blob: f4f3d8a7e306212396abe92ba4c81247e084cd55 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
Daniel Veillard8915c152008-08-26 13:05:34 +000086 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature. It can be disabled with the XML_PARSE_HUGE
89 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000090 */
Daniel Veillard8915c152008-08-26 13:05:34 +000091unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +000092
Daniel Veillard0fb18932003-09-07 09:14:37 +000093#define SAX2 1
94
Daniel Veillard21a0f912001-02-25 19:54:14 +000095#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000096#define XML_PARSER_BUFFER_SIZE 100
97
Daniel Veillard5997aca2002-03-18 18:36:20 +000098#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
99
Owen Taylor3473f882001-02-23 17:55:21 +0000100/*
Owen Taylor3473f882001-02-23 17:55:21 +0000101 * List of XML prefixed PI allowed by W3C specs
102 */
103
Daniel Veillardb44025c2001-10-11 22:55:55 +0000104static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000105 "xml-stylesheet",
106 NULL
107};
108
Daniel Veillarda07050d2003-10-19 14:46:32 +0000109
Owen Taylor3473f882001-02-23 17:55:21 +0000110/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000111xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
112 const xmlChar **str);
113
Daniel Veillard7d515752003-09-26 19:12:37 +0000114static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000115xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
116 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000117 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000118 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000119
Daniel Veillard37334572008-07-31 08:20:02 +0000120static int
121xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
122 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000124static void
125xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
126 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000127#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000128
Daniel Veillard7d515752003-09-26 19:12:37 +0000129static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000130xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
131 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000133static int
134xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
135
Daniel Veillarde57ec792003-09-10 10:50:59 +0000136/************************************************************************
137 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000138 * Some factorized error routines *
139 * *
140 ************************************************************************/
141
142/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000143 * xmlErrAttributeDup:
144 * @ctxt: an XML parser context
145 * @prefix: the attribute prefix
146 * @localname: the attribute localname
147 *
148 * Handle a redefinition of attribute error
149 */
150static void
151xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
152 const xmlChar * localname)
153{
Daniel Veillard157fee02003-10-31 10:36:03 +0000154 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
155 (ctxt->instate == XML_PARSER_EOF))
156 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000157 if (ctxt != NULL)
158 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000159 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000160 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000161 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
162 (const char *) localname, NULL, NULL, 0, 0,
163 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000164 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000165 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000166 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
167 (const char *) prefix, (const char *) localname,
168 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
169 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000170 if (ctxt != NULL) {
171 ctxt->wellFormed = 0;
172 if (ctxt->recovery == 0)
173 ctxt->disableSAX = 1;
174 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000175}
176
177/**
178 * xmlFatalErr:
179 * @ctxt: an XML parser context
180 * @error: the error number
181 * @extra: extra information string
182 *
183 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
184 */
185static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000186xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187{
188 const char *errmsg;
189
Daniel Veillard157fee02003-10-31 10:36:03 +0000190 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
191 (ctxt->instate == XML_PARSER_EOF))
192 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 switch (error) {
194 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000195 errmsg = "CharRef: invalid hexadecimal value\n";
196 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000197 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000198 errmsg = "CharRef: invalid decimal value\n";
199 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000200 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000201 errmsg = "CharRef: invalid value\n";
202 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000203 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000204 errmsg = "internal error";
205 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000206 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000207 errmsg = "PEReference at end of document\n";
208 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000209 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000210 errmsg = "PEReference in prolog\n";
211 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000212 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000213 errmsg = "PEReference in epilog\n";
214 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000215 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000216 errmsg = "PEReference: no name\n";
217 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000218 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000219 errmsg = "PEReference: expecting ';'\n";
220 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000221 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000222 errmsg = "Detected an entity reference loop\n";
223 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000224 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000225 errmsg = "EntityValue: \" or ' expected\n";
226 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000227 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000228 errmsg = "PEReferences forbidden in internal subset\n";
229 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000230 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000231 errmsg = "EntityValue: \" or ' expected\n";
232 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000234 errmsg = "AttValue: \" or ' expected\n";
235 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000236 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000237 errmsg = "Unescaped '<' not allowed in attributes values\n";
238 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000239 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000240 errmsg = "SystemLiteral \" or ' expected\n";
241 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000242 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000243 errmsg = "Unfinished System or Public ID \" or ' expected\n";
244 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000245 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000246 errmsg = "Sequence ']]>' not allowed in content\n";
247 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000248 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000249 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
250 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000251 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000252 errmsg = "PUBLIC, the Public Identifier is missing\n";
253 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000254 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 errmsg = "Comment must not contain '--' (double-hyphen)\n";
256 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000257 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000258 errmsg = "xmlParsePI : no target name\n";
259 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000260 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000261 errmsg = "Invalid PI name\n";
262 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000263 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000264 errmsg = "NOTATION: Name expected here\n";
265 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000266 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000267 errmsg = "'>' required to close NOTATION declaration\n";
268 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000270 errmsg = "Entity value required\n";
271 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000273 errmsg = "Fragment not allowed";
274 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000275 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276 errmsg = "'(' required to start ATTLIST enumeration\n";
277 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000278 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000279 errmsg = "NmToken expected in ATTLIST enumeration\n";
280 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000282 errmsg = "')' required to finish ATTLIST enumeration\n";
283 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000284 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "ContentDecl : Name or '(' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg =
298 "PEReference: forbidden within markup decl in internal subset\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "expected '>'\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section '[' expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Content error in the external subset\n";
308 break;
309 case XML_ERR_CONDSEC_INVALID_KEYWORD:
310 errmsg =
311 "conditional section INCLUDE or IGNORE keyword expected\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "XML conditional section not closed\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Text declaration '<?xml' required\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "parsing XML declaration: '?>' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "external parsed entities cannot be standalone\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityRef: expecting ';'\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "DOCTYPE improperly terminated\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "EndTag: '</' not found\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "expected '='\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "String not closed expecting \" or '\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "String not started expecting ' or \"\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "Invalid XML encoding name\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "standalone accepts only 'yes' or 'no'\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Document is empty\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "Extra content at the end of the document\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "chunk is not well balanced\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "extra content at the end of well balanced chunk\n";
360 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "Malformed declaration expecting version\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 case:
366 errmsg = "\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 default:
370 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000372 if (ctxt != NULL)
373 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000374 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
376 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000377 if (ctxt != NULL) {
378 ctxt->wellFormed = 0;
379 if (ctxt->recovery == 0)
380 ctxt->disableSAX = 1;
381 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382}
383
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000384/**
385 * xmlFatalErrMsg:
386 * @ctxt: an XML parser context
387 * @error: the error number
388 * @msg: the error message
389 *
390 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
391 */
392static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000395{
Daniel Veillard157fee02003-10-31 10:36:03 +0000396 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
397 (ctxt->instate == XML_PARSER_EOF))
398 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL)
400 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000401 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000403 if (ctxt != NULL) {
404 ctxt->wellFormed = 0;
405 if (ctxt->recovery == 0)
406 ctxt->disableSAX = 1;
407 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000408}
409
410/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000411 * xmlWarningMsg:
412 * @ctxt: an XML parser context
413 * @error: the error number
414 * @msg: the error message
415 * @str1: extra data
416 * @str2: extra data
417 *
418 * Handle a warning.
419 */
420static void
421xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
422 const char *msg, const xmlChar *str1, const xmlChar *str2)
423{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000425
Daniel Veillard157fee02003-10-31 10:36:03 +0000426 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
427 (ctxt->instate == XML_PARSER_EOF))
428 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000429 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
430 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000431 schannel = ctxt->sax->serror;
432 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000433 (ctxt->sax) ? ctxt->sax->warning : NULL,
434 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000435 ctxt, NULL, XML_FROM_PARSER, error,
436 XML_ERR_WARNING, NULL, 0,
437 (const char *) str1, (const char *) str2, NULL, 0, 0,
438 msg, (const char *) str1, (const char *) str2);
439}
440
441/**
442 * xmlValidityError:
443 * @ctxt: an XML parser context
444 * @error: the error number
445 * @msg: the error message
446 * @str1: extra data
447 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000448 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000449 */
450static void
451xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000452 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000453{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000454 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000455
456 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
457 (ctxt->instate == XML_PARSER_EOF))
458 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000459 if (ctxt != NULL) {
460 ctxt->errNo = error;
461 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
462 schannel = ctxt->sax->serror;
463 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000464 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000465 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466 ctxt, NULL, XML_FROM_DTD, error,
467 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000468 (const char *) str2, NULL, 0, 0,
469 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000470 if (ctxt != NULL) {
471 ctxt->valid = 0;
472 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000473}
474
475/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 * xmlFatalErrMsgInt:
477 * @ctxt: an XML parser context
478 * @error: the error number
479 * @msg: the error message
480 * @val: an integer value
481 *
482 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
483 */
484static void
485xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000487{
Daniel Veillard157fee02003-10-31 10:36:03 +0000488 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
489 (ctxt->instate == XML_PARSER_EOF))
490 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000491 if (ctxt != NULL)
492 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000493 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
495 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000496 if (ctxt != NULL) {
497 ctxt->wellFormed = 0;
498 if (ctxt->recovery == 0)
499 ctxt->disableSAX = 1;
500 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000501}
502
503/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000504 * xmlFatalErrMsgStrIntStr:
505 * @ctxt: an XML parser context
506 * @error: the error number
507 * @msg: the error message
508 * @str1: an string info
509 * @val: an integer value
510 * @str2: an string info
511 *
512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
513 */
514static void
515xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, int val,
517 const xmlChar *str2)
518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000522 if (ctxt != NULL)
523 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000524 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000525 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
526 NULL, 0, (const char *) str1, (const char *) str2,
527 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000528 if (ctxt != NULL) {
529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000533}
534
535/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000536 * xmlFatalErrMsgStr:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @val: a string value
541 *
542 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
543 */
544static void
545xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000546 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000547{
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if (ctxt != NULL)
552 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000553 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000554 XML_FROM_PARSER, error, XML_ERR_FATAL,
555 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
556 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000557 if (ctxt != NULL) {
558 ctxt->wellFormed = 0;
559 if (ctxt->recovery == 0)
560 ctxt->disableSAX = 1;
561 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000562}
563
564/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000565 * xmlErrMsgStr:
566 * @ctxt: an XML parser context
567 * @error: the error number
568 * @msg: the error message
569 * @val: a string value
570 *
571 * Handle a non fatal parser error
572 */
573static void
574xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
575 const char *msg, const xmlChar * val)
576{
Daniel Veillard157fee02003-10-31 10:36:03 +0000577 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
578 (ctxt->instate == XML_PARSER_EOF))
579 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000580 if (ctxt != NULL)
581 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000582 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000583 XML_FROM_PARSER, error, XML_ERR_ERROR,
584 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
585 val);
586}
587
588/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000589 * xmlNsErr:
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the message
593 * @info1: extra information string
594 * @info2: extra information string
595 *
596 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
597 */
598static void
599xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
600 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000601 const xmlChar * info1, const xmlChar * info2,
602 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000603{
Daniel Veillard157fee02003-10-31 10:36:03 +0000604 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605 (ctxt->instate == XML_PARSER_EOF))
606 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000607 if (ctxt != NULL)
608 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000609 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000610 XML_ERR_ERROR, NULL, 0, (const char *) info1,
611 (const char *) info2, (const char *) info3, 0, 0, msg,
612 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000613 if (ctxt != NULL)
614 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000615}
616
Daniel Veillard37334572008-07-31 08:20:02 +0000617/**
618 * xmlNsWarn
619 * @ctxt: an XML parser context
620 * @error: the error number
621 * @msg: the message
622 * @info1: extra information string
623 * @info2: extra information string
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627static void
628xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg,
630 const xmlChar * info1, const xmlChar * info2,
631 const xmlChar * info3)
632{
633 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634 (ctxt->instate == XML_PARSER_EOF))
635 return;
636 if (ctxt != NULL)
637 ctxt->errNo = error;
638 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
639 XML_ERR_WARNING, NULL, 0, (const char *) info1,
640 (const char *) info2, (const char *) info3, 0, 0, msg,
641 info1, info2, info3);
642}
643
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000644/************************************************************************
645 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646 * Library wide options *
647 * *
648 ************************************************************************/
649
650/**
651 * xmlHasFeature:
652 * @feature: the feature to be examined
653 *
654 * Examines if the library has been compiled with a given feature.
655 *
656 * Returns a non-zero value if the feature exist, otherwise zero.
657 * Returns zero (0) if the feature does not exist or an unknown
658 * unknown feature is requested, non-zero otherwise.
659 */
660int
661xmlHasFeature(xmlFeature feature)
662{
663 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_THREAD_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_TREE_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_OUTPUT_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_PUSH_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_READER_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_PATTERN_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_WRITER_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_SAX1_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_FTP_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_HTTP_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_VALID_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_HTML_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_LEGACY_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_C14N_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_CATALOG_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_XPATH_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_XPTR_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_XINCLUDE_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_ICONV_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_ISO8859X_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_UNICODE_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_REGEXP_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_AUTOMATA_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_EXPR_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_SCHEMAS_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_SCHEMATRON_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_MODULES_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_DEBUG_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef DEBUG_MEMORY_LOCATION
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_DEBUG_RUNTIME
840 return(1);
841#else
842 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000843#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000844 case XML_WITH_ZLIB:
845#ifdef LIBXML_ZLIB_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850 default:
851 break;
852 }
853 return(0);
854}
855
856/************************************************************************
857 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 * SAX2 defaulted attributes handling *
859 * *
860 ************************************************************************/
861
862/**
863 * xmlDetectSAX2:
864 * @ctxt: an XML parser context
865 *
866 * Do the SAX2 detection and specific intialization
867 */
868static void
869xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
870 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000871#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000872 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
873 ((ctxt->sax->startElementNs != NULL) ||
874 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000875#else
876 ctxt->sax2 = 1;
877#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000878
879 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
880 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
881 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000882 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
883 (ctxt->str_xml_ns == NULL)) {
884 xmlErrMemory(ctxt, NULL);
885 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886}
887
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888typedef struct _xmlDefAttrs xmlDefAttrs;
889typedef xmlDefAttrs *xmlDefAttrsPtr;
890struct _xmlDefAttrs {
891 int nbAttrs; /* number of defaulted attributes on that element */
892 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000893 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000895
896/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000897 * xmlAttrNormalizeSpace:
898 * @src: the source string
899 * @dst: the target string
900 *
901 * Normalize the space in non CDATA attribute values:
902 * If the attribute type is not CDATA, then the XML processor MUST further
903 * process the normalized attribute value by discarding any leading and
904 * trailing space (#x20) characters, and by replacing sequences of space
905 * (#x20) characters by a single space (#x20) character.
906 * Note that the size of dst need to be at least src, and if one doesn't need
907 * to preserve dst (and it doesn't come from a dictionary or read-only) then
908 * passing src as dst is just fine.
909 *
910 * Returns a pointer to the normalized value (dst) or NULL if no conversion
911 * is needed.
912 */
913static xmlChar *
914xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
915{
916 if ((src == NULL) || (dst == NULL))
917 return(NULL);
918
919 while (*src == 0x20) src++;
920 while (*src != 0) {
921 if (*src == 0x20) {
922 while (*src == 0x20) src++;
923 if (*src != 0)
924 *dst++ = 0x20;
925 } else {
926 *dst++ = *src++;
927 }
928 }
929 *dst = 0;
930 if (dst == src)
931 return(NULL);
932 return(dst);
933}
934
935/**
936 * xmlAttrNormalizeSpace2:
937 * @src: the source string
938 *
939 * Normalize the space in non CDATA attribute values, a slightly more complex
940 * front end to avoid allocation problems when running on attribute values
941 * coming from the input.
942 *
943 * Returns a pointer to the normalized value (dst) or NULL if no conversion
944 * is needed.
945 */
946static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +0000947xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000948{
949 int i;
950 int remove_head = 0;
951 int need_realloc = 0;
952 const xmlChar *cur;
953
954 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
955 return(NULL);
956 i = *len;
957 if (i <= 0)
958 return(NULL);
959
960 cur = src;
961 while (*cur == 0x20) {
962 cur++;
963 remove_head++;
964 }
965 while (*cur != 0) {
966 if (*cur == 0x20) {
967 cur++;
968 if ((*cur == 0x20) || (*cur == 0)) {
969 need_realloc = 1;
970 break;
971 }
972 } else
973 cur++;
974 }
975 if (need_realloc) {
976 xmlChar *ret;
977
978 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
979 if (ret == NULL) {
980 xmlErrMemory(ctxt, NULL);
981 return(NULL);
982 }
983 xmlAttrNormalizeSpace(ret, ret);
984 *len = (int) strlen((const char *)ret);
985 return(ret);
986 } else if (remove_head) {
987 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +0000988 memmove(src, src + remove_head, 1 + *len);
989 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000990 }
991 return(NULL);
992}
993
994/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995 * xmlAddDefAttrs:
996 * @ctxt: an XML parser context
997 * @fullname: the element fullname
998 * @fullattr: the attribute fullname
999 * @value: the attribute value
1000 *
1001 * Add a defaulted attribute for an element
1002 */
1003static void
1004xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1005 const xmlChar *fullname,
1006 const xmlChar *fullattr,
1007 const xmlChar *value) {
1008 xmlDefAttrsPtr defaults;
1009 int len;
1010 const xmlChar *name;
1011 const xmlChar *prefix;
1012
Daniel Veillard6a31b832008-03-26 14:06:44 +00001013 /*
1014 * Allows to detect attribute redefinitions
1015 */
1016 if (ctxt->attsSpecial != NULL) {
1017 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1018 return;
1019 }
1020
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001022 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001023 if (ctxt->attsDefault == NULL)
1024 goto mem_error;
1025 }
1026
1027 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001028 * split the element name into prefix:localname , the string found
1029 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 */
1031 name = xmlSplitQName3(fullname, &len);
1032 if (name == NULL) {
1033 name = xmlDictLookup(ctxt->dict, fullname, -1);
1034 prefix = NULL;
1035 } else {
1036 name = xmlDictLookup(ctxt->dict, name, -1);
1037 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1038 }
1039
1040 /*
1041 * make sure there is some storage
1042 */
1043 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1044 if (defaults == NULL) {
1045 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001046 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001047 if (defaults == NULL)
1048 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001049 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001050 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001051 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1052 defaults, NULL) < 0) {
1053 xmlFree(defaults);
1054 goto mem_error;
1055 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001057 xmlDefAttrsPtr temp;
1058
1059 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001060 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001061 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001063 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001064 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001065 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1066 defaults, NULL) < 0) {
1067 xmlFree(defaults);
1068 goto mem_error;
1069 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070 }
1071
1072 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001073 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001074 * are within the DTD and hen not associated to namespace names.
1075 */
1076 name = xmlSplitQName3(fullattr, &len);
1077 if (name == NULL) {
1078 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1079 prefix = NULL;
1080 } else {
1081 name = xmlDictLookup(ctxt->dict, name, -1);
1082 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1083 }
1084
Daniel Veillardae0765b2008-07-31 19:54:59 +00001085 defaults->values[5 * defaults->nbAttrs] = name;
1086 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 /* intern the string and precompute the end */
1088 len = xmlStrlen(value);
1089 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001090 defaults->values[5 * defaults->nbAttrs + 2] = value;
1091 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1092 if (ctxt->external)
1093 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1094 else
1095 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 defaults->nbAttrs++;
1097
1098 return;
1099
1100mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001101 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return;
1103}
1104
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001105/**
1106 * xmlAddSpecialAttr:
1107 * @ctxt: an XML parser context
1108 * @fullname: the element fullname
1109 * @fullattr: the attribute fullname
1110 * @type: the attribute type
1111 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001112 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001113 */
1114static void
1115xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1116 const xmlChar *fullname,
1117 const xmlChar *fullattr,
1118 int type)
1119{
1120 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001121 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001122 if (ctxt->attsSpecial == NULL)
1123 goto mem_error;
1124 }
1125
Daniel Veillardac4118d2008-01-11 05:27:32 +00001126 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1127 return;
1128
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001129 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1130 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001131 return;
1132
1133mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001134 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001135 return;
1136}
1137
Daniel Veillard4432df22003-09-28 18:58:27 +00001138/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001139 * xmlCleanSpecialAttrCallback:
1140 *
1141 * Removes CDATA attributes from the special attribute table
1142 */
1143static void
1144xmlCleanSpecialAttrCallback(void *payload, void *data,
1145 const xmlChar *fullname, const xmlChar *fullattr,
1146 const xmlChar *unused ATTRIBUTE_UNUSED) {
1147 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1148
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001149 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001150 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1151 }
1152}
1153
1154/**
1155 * xmlCleanSpecialAttr:
1156 * @ctxt: an XML parser context
1157 *
1158 * Trim the list of attributes defined to remove all those of type
1159 * CDATA as they are not special. This call should be done when finishing
1160 * to parse the DTD and before starting to parse the document root.
1161 */
1162static void
1163xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1164{
1165 if (ctxt->attsSpecial == NULL)
1166 return;
1167
1168 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1169
1170 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1171 xmlHashFree(ctxt->attsSpecial, NULL);
1172 ctxt->attsSpecial = NULL;
1173 }
1174 return;
1175}
1176
1177/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001178 * xmlCheckLanguageID:
1179 * @lang: pointer to the string value
1180 *
1181 * Checks that the value conforms to the LanguageID production:
1182 *
1183 * NOTE: this is somewhat deprecated, those productions were removed from
1184 * the XML Second edition.
1185 *
1186 * [33] LanguageID ::= Langcode ('-' Subcode)*
1187 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1188 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1189 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1190 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1191 * [38] Subcode ::= ([a-z] | [A-Z])+
1192 *
1193 * Returns 1 if correct 0 otherwise
1194 **/
1195int
1196xmlCheckLanguageID(const xmlChar * lang)
1197{
1198 const xmlChar *cur = lang;
1199
1200 if (cur == NULL)
1201 return (0);
1202 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1203 ((cur[0] == 'I') && (cur[1] == '-'))) {
1204 /*
1205 * IANA code
1206 */
1207 cur += 2;
1208 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1209 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1210 cur++;
1211 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1212 ((cur[0] == 'X') && (cur[1] == '-'))) {
1213 /*
1214 * User code
1215 */
1216 cur += 2;
1217 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1218 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1219 cur++;
1220 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1221 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1222 /*
1223 * ISO639
1224 */
1225 cur++;
1226 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1227 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1228 cur++;
1229 else
1230 return (0);
1231 } else
1232 return (0);
1233 while (cur[0] != 0) { /* non input consuming */
1234 if (cur[0] != '-')
1235 return (0);
1236 cur++;
1237 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1238 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1239 cur++;
1240 else
1241 return (0);
1242 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1243 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1244 cur++;
1245 }
1246 return (1);
1247}
1248
Owen Taylor3473f882001-02-23 17:55:21 +00001249/************************************************************************
1250 * *
1251 * Parser stacks related functions and macros *
1252 * *
1253 ************************************************************************/
1254
1255xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1256 const xmlChar ** str);
1257
Daniel Veillard0fb18932003-09-07 09:14:37 +00001258#ifdef SAX2
1259/**
1260 * nsPush:
1261 * @ctxt: an XML parser context
1262 * @prefix: the namespace prefix or NULL
1263 * @URL: the namespace name
1264 *
1265 * Pushes a new parser namespace on top of the ns stack
1266 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001267 * Returns -1 in case of error, -2 if the namespace should be discarded
1268 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001269 */
1270static int
1271nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1272{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001273 if (ctxt->options & XML_PARSE_NSCLEAN) {
1274 int i;
1275 for (i = 0;i < ctxt->nsNr;i += 2) {
1276 if (ctxt->nsTab[i] == prefix) {
1277 /* in scope */
1278 if (ctxt->nsTab[i + 1] == URL)
1279 return(-2);
1280 /* out of scope keep it */
1281 break;
1282 }
1283 }
1284 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001285 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1286 ctxt->nsMax = 10;
1287 ctxt->nsNr = 0;
1288 ctxt->nsTab = (const xmlChar **)
1289 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1290 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001291 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001292 ctxt->nsMax = 0;
1293 return (-1);
1294 }
1295 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001296 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001297 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001298 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1299 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1300 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001301 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001302 ctxt->nsMax /= 2;
1303 return (-1);
1304 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001305 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001306 }
1307 ctxt->nsTab[ctxt->nsNr++] = prefix;
1308 ctxt->nsTab[ctxt->nsNr++] = URL;
1309 return (ctxt->nsNr);
1310}
1311/**
1312 * nsPop:
1313 * @ctxt: an XML parser context
1314 * @nr: the number to pop
1315 *
1316 * Pops the top @nr parser prefix/namespace from the ns stack
1317 *
1318 * Returns the number of namespaces removed
1319 */
1320static int
1321nsPop(xmlParserCtxtPtr ctxt, int nr)
1322{
1323 int i;
1324
1325 if (ctxt->nsTab == NULL) return(0);
1326 if (ctxt->nsNr < nr) {
1327 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1328 nr = ctxt->nsNr;
1329 }
1330 if (ctxt->nsNr <= 0)
1331 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001332
Daniel Veillard0fb18932003-09-07 09:14:37 +00001333 for (i = 0;i < nr;i++) {
1334 ctxt->nsNr--;
1335 ctxt->nsTab[ctxt->nsNr] = NULL;
1336 }
1337 return(nr);
1338}
1339#endif
1340
1341static int
1342xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1343 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001344 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001345 int maxatts;
1346
1347 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001348 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001349 atts = (const xmlChar **)
1350 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001351 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001352 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001353 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1354 if (attallocs == NULL) goto mem_error;
1355 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001356 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001357 } else if (nr + 5 > ctxt->maxatts) {
1358 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1360 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001361 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001362 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1364 (maxatts / 5) * sizeof(int));
1365 if (attallocs == NULL) goto mem_error;
1366 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001367 ctxt->maxatts = maxatts;
1368 }
1369 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001371 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001372 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001373}
1374
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001375/**
1376 * inputPush:
1377 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001378 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001379 *
1380 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001381 *
1382 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001383 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001384int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1386{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001387 if ((ctxt == NULL) || (value == NULL))
1388 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 if (ctxt->inputNr >= ctxt->inputMax) {
1390 ctxt->inputMax *= 2;
1391 ctxt->inputTab =
1392 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1393 ctxt->inputMax *
1394 sizeof(ctxt->inputTab[0]));
1395 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001396 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001397 return (0);
1398 }
1399 }
1400 ctxt->inputTab[ctxt->inputNr] = value;
1401 ctxt->input = value;
1402 return (ctxt->inputNr++);
1403}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001404/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001405 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001406 * @ctxt: an XML parser context
1407 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001408 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001409 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001410 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413inputPop(xmlParserCtxtPtr ctxt)
1414{
1415 xmlParserInputPtr ret;
1416
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001417 if (ctxt == NULL)
1418 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001420 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001421 ctxt->inputNr--;
1422 if (ctxt->inputNr > 0)
1423 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1424 else
1425 ctxt->input = NULL;
1426 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001427 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001428 return (ret);
1429}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001430/**
1431 * nodePush:
1432 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001433 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001434 *
1435 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001436 *
1437 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001438 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001439int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001440nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1441{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001442 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001443 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001444 xmlNodePtr *tmp;
1445
1446 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1447 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001448 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001449 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001450 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001451 return (0);
1452 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001453 ctxt->nodeTab = tmp;
1454 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001455 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001456 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1457 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001458 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001459 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001460 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001461 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001462 return(0);
1463 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001464 ctxt->nodeTab[ctxt->nodeNr] = value;
1465 ctxt->node = value;
1466 return (ctxt->nodeNr++);
1467}
Daniel Veillard8915c152008-08-26 13:05:34 +00001468
Daniel Veillard1c732d22002-11-30 11:22:59 +00001469/**
1470 * nodePop:
1471 * @ctxt: an XML parser context
1472 *
1473 * Pops the top element node from the node stack
1474 *
1475 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001476 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001477xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001478nodePop(xmlParserCtxtPtr ctxt)
1479{
1480 xmlNodePtr ret;
1481
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001482 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001483 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001484 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001485 ctxt->nodeNr--;
1486 if (ctxt->nodeNr > 0)
1487 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1488 else
1489 ctxt->node = NULL;
1490 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001491 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001492 return (ret);
1493}
Daniel Veillarda2351322004-06-27 12:08:10 +00001494
1495#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001496/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001497 * nameNsPush:
1498 * @ctxt: an XML parser context
1499 * @value: the element name
1500 * @prefix: the element prefix
1501 * @URI: the element namespace name
1502 *
1503 * Pushes a new element name/prefix/URL on top of the name stack
1504 *
1505 * Returns -1 in case of error, the index in the stack otherwise
1506 */
1507static int
1508nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1509 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1510{
1511 if (ctxt->nameNr >= ctxt->nameMax) {
1512 const xmlChar * *tmp;
1513 void **tmp2;
1514 ctxt->nameMax *= 2;
1515 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1516 ctxt->nameMax *
1517 sizeof(ctxt->nameTab[0]));
1518 if (tmp == NULL) {
1519 ctxt->nameMax /= 2;
1520 goto mem_error;
1521 }
1522 ctxt->nameTab = tmp;
1523 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1524 ctxt->nameMax * 3 *
1525 sizeof(ctxt->pushTab[0]));
1526 if (tmp2 == NULL) {
1527 ctxt->nameMax /= 2;
1528 goto mem_error;
1529 }
1530 ctxt->pushTab = tmp2;
1531 }
1532 ctxt->nameTab[ctxt->nameNr] = value;
1533 ctxt->name = value;
1534 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1535 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001536 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001537 return (ctxt->nameNr++);
1538mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001539 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001540 return (-1);
1541}
1542/**
1543 * nameNsPop:
1544 * @ctxt: an XML parser context
1545 *
1546 * Pops the top element/prefix/URI name from the name stack
1547 *
1548 * Returns the name just removed
1549 */
1550static const xmlChar *
1551nameNsPop(xmlParserCtxtPtr ctxt)
1552{
1553 const xmlChar *ret;
1554
1555 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001556 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001557 ctxt->nameNr--;
1558 if (ctxt->nameNr > 0)
1559 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1560 else
1561 ctxt->name = NULL;
1562 ret = ctxt->nameTab[ctxt->nameNr];
1563 ctxt->nameTab[ctxt->nameNr] = NULL;
1564 return (ret);
1565}
Daniel Veillarda2351322004-06-27 12:08:10 +00001566#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001567
1568/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001569 * namePush:
1570 * @ctxt: an XML parser context
1571 * @value: the element name
1572 *
1573 * Pushes a new element name on top of the name stack
1574 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001575 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001577int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001578namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001579{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001580 if (ctxt == NULL) return (-1);
1581
Daniel Veillard1c732d22002-11-30 11:22:59 +00001582 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001583 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001584 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001585 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001586 ctxt->nameMax *
1587 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001588 if (tmp == NULL) {
1589 ctxt->nameMax /= 2;
1590 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001593 }
1594 ctxt->nameTab[ctxt->nameNr] = value;
1595 ctxt->name = value;
1596 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001597mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001598 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001599 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001600}
1601/**
1602 * namePop:
1603 * @ctxt: an XML parser context
1604 *
1605 * Pops the top element name from the name stack
1606 *
1607 * Returns the name just removed
1608 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001609const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001610namePop(xmlParserCtxtPtr ctxt)
1611{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001612 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001613
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001614 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1615 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001616 ctxt->nameNr--;
1617 if (ctxt->nameNr > 0)
1618 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1619 else
1620 ctxt->name = NULL;
1621 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001622 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001623 return (ret);
1624}
Owen Taylor3473f882001-02-23 17:55:21 +00001625
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001626static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001627 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001628 int *tmp;
1629
Owen Taylor3473f882001-02-23 17:55:21 +00001630 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001631 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1632 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1633 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001635 return(0);
1636 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001637 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001638 }
1639 ctxt->spaceTab[ctxt->spaceNr] = val;
1640 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1641 return(ctxt->spaceNr++);
1642}
1643
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001644static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001645 int ret;
1646 if (ctxt->spaceNr <= 0) return(0);
1647 ctxt->spaceNr--;
1648 if (ctxt->spaceNr > 0)
1649 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1650 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001651 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001652 ret = ctxt->spaceTab[ctxt->spaceNr];
1653 ctxt->spaceTab[ctxt->spaceNr] = -1;
1654 return(ret);
1655}
1656
1657/*
1658 * Macros for accessing the content. Those should be used only by the parser,
1659 * and not exported.
1660 *
1661 * Dirty macros, i.e. one often need to make assumption on the context to
1662 * use them
1663 *
1664 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1665 * To be used with extreme caution since operations consuming
1666 * characters may move the input buffer to a different location !
1667 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1668 * This should be used internally by the parser
1669 * only to compare to ASCII values otherwise it would break when
1670 * running with UTF-8 encoding.
1671 * RAW same as CUR but in the input buffer, bypass any token
1672 * extraction that may have been done
1673 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1674 * to compare on ASCII based substring.
1675 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001676 * strings without newlines within the parser.
1677 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1678 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001679 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1680 *
1681 * NEXT Skip to the next character, this does the proper decoding
1682 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001683 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001684 * CUR_CHAR(l) returns the current unicode character (int), set l
1685 * to the number of xmlChars used for the encoding [0-5].
1686 * CUR_SCHAR same but operate on a string instead of the context
1687 * COPY_BUF copy the current unicode char to the target buffer, increment
1688 * the index
1689 * GROW, SHRINK handling of input buffers
1690 */
1691
Daniel Veillardfdc91562002-07-01 21:52:03 +00001692#define RAW (*ctxt->input->cur)
1693#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001694#define NXT(val) ctxt->input->cur[(val)]
1695#define CUR_PTR ctxt->input->cur
1696
Daniel Veillarda07050d2003-10-19 14:46:32 +00001697#define CMP4( s, c1, c2, c3, c4 ) \
1698 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1699 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1700#define CMP5( s, c1, c2, c3, c4, c5 ) \
1701 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1702#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1703 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1704#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1705 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1706#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1707 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1708#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1709 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1710 ((unsigned char *) s)[ 8 ] == c9 )
1711#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1712 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1713 ((unsigned char *) s)[ 9 ] == c10 )
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001716 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001717 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001718 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001719 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1720 xmlPopInput(ctxt); \
1721 } while (0)
1722
Daniel Veillard0b787f32004-03-26 17:29:53 +00001723#define SKIPL(val) do { \
1724 int skipl; \
1725 for(skipl=0; skipl<val; skipl++) { \
1726 if (*(ctxt->input->cur) == '\n') { \
1727 ctxt->input->line++; ctxt->input->col = 1; \
1728 } else ctxt->input->col++; \
1729 ctxt->nbChars++; \
1730 ctxt->input->cur++; \
1731 } \
1732 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1733 if ((*ctxt->input->cur == 0) && \
1734 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1735 xmlPopInput(ctxt); \
1736 } while (0)
1737
Daniel Veillarda880b122003-04-21 21:36:41 +00001738#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001739 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1740 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001741 xmlSHRINK (ctxt);
1742
1743static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1744 xmlParserInputShrink(ctxt->input);
1745 if ((*ctxt->input->cur == 0) &&
1746 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1747 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001748 }
Owen Taylor3473f882001-02-23 17:55:21 +00001749
Daniel Veillarda880b122003-04-21 21:36:41 +00001750#define GROW if ((ctxt->progressive == 0) && \
1751 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001752 xmlGROW (ctxt);
1753
1754static void xmlGROW (xmlParserCtxtPtr ctxt) {
1755 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1756 if ((*ctxt->input->cur == 0) &&
1757 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1758 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001759}
Owen Taylor3473f882001-02-23 17:55:21 +00001760
1761#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1762
1763#define NEXT xmlNextChar(ctxt)
1764
Daniel Veillard21a0f912001-02-25 19:54:14 +00001765#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001766 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001767 ctxt->input->cur++; \
1768 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001769 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001770 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1771 }
1772
Owen Taylor3473f882001-02-23 17:55:21 +00001773#define NEXTL(l) do { \
1774 if (*(ctxt->input->cur) == '\n') { \
1775 ctxt->input->line++; ctxt->input->col = 1; \
1776 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001777 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001778 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001779 } while (0)
1780
1781#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1782#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1783
1784#define COPY_BUF(l,b,i,v) \
1785 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001786 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001787
1788/**
1789 * xmlSkipBlankChars:
1790 * @ctxt: the XML parser context
1791 *
1792 * skip all blanks character found at that point in the input streams.
1793 * It pops up finished entities in the process if allowable at that point.
1794 *
1795 * Returns the number of space chars skipped
1796 */
1797
1798int
1799xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001800 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001801
1802 /*
1803 * It's Okay to use CUR/NEXT here since all the blanks are on
1804 * the ASCII range.
1805 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001806 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1807 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001808 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001809 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001810 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001811 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001812 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001813 if (*cur == '\n') {
1814 ctxt->input->line++; ctxt->input->col = 1;
1815 }
1816 cur++;
1817 res++;
1818 if (*cur == 0) {
1819 ctxt->input->cur = cur;
1820 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1821 cur = ctxt->input->cur;
1822 }
1823 }
1824 ctxt->input->cur = cur;
1825 } else {
1826 int cur;
1827 do {
1828 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001829 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001830 NEXT;
1831 cur = CUR;
1832 res++;
1833 }
1834 while ((cur == 0) && (ctxt->inputNr > 1) &&
1835 (ctxt->instate != XML_PARSER_COMMENT)) {
1836 xmlPopInput(ctxt);
1837 cur = CUR;
1838 }
1839 /*
1840 * Need to handle support of entities branching here
1841 */
1842 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1843 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845 return(res);
1846}
1847
1848/************************************************************************
1849 * *
1850 * Commodity functions to handle entities *
1851 * *
1852 ************************************************************************/
1853
1854/**
1855 * xmlPopInput:
1856 * @ctxt: an XML parser context
1857 *
1858 * xmlPopInput: the current input pointed by ctxt->input came to an end
1859 * pop it and return the next char.
1860 *
1861 * Returns the current xmlChar in the parser context
1862 */
1863xmlChar
1864xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001865 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001866 if (xmlParserDebugEntities)
1867 xmlGenericError(xmlGenericErrorContext,
1868 "Popping input %d\n", ctxt->inputNr);
1869 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001870 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001871 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1872 return(xmlPopInput(ctxt));
1873 return(CUR);
1874}
1875
1876/**
1877 * xmlPushInput:
1878 * @ctxt: an XML parser context
1879 * @input: an XML parser input fragment (entity, XML fragment ...).
1880 *
1881 * xmlPushInput: switch to a new input stream which is stacked on top
1882 * of the previous one(s).
1883 */
1884void
1885xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1886 if (input == NULL) return;
1887
1888 if (xmlParserDebugEntities) {
1889 if ((ctxt->input != NULL) && (ctxt->input->filename))
1890 xmlGenericError(xmlGenericErrorContext,
1891 "%s(%d): ", ctxt->input->filename,
1892 ctxt->input->line);
1893 xmlGenericError(xmlGenericErrorContext,
1894 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1895 }
1896 inputPush(ctxt, input);
1897 GROW;
1898}
1899
1900/**
1901 * xmlParseCharRef:
1902 * @ctxt: an XML parser context
1903 *
1904 * parse Reference declarations
1905 *
1906 * [66] CharRef ::= '&#' [0-9]+ ';' |
1907 * '&#x' [0-9a-fA-F]+ ';'
1908 *
1909 * [ WFC: Legal Character ]
1910 * Characters referred to using character references must match the
1911 * production for Char.
1912 *
1913 * Returns the value parsed (as an int), 0 in case of error
1914 */
1915int
1916xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001917 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001918 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001919 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001920
Owen Taylor3473f882001-02-23 17:55:21 +00001921 /*
1922 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1923 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001924 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001925 (NXT(2) == 'x')) {
1926 SKIP(3);
1927 GROW;
1928 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001929 if (count++ > 20) {
1930 count = 0;
1931 GROW;
1932 }
1933 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001934 val = val * 16 + (CUR - '0');
1935 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1936 val = val * 16 + (CUR - 'a') + 10;
1937 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1938 val = val * 16 + (CUR - 'A') + 10;
1939 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001940 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001941 val = 0;
1942 break;
1943 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001944 if (val > 0x10FFFF)
1945 outofrange = val;
1946
Owen Taylor3473f882001-02-23 17:55:21 +00001947 NEXT;
1948 count++;
1949 }
1950 if (RAW == ';') {
1951 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001952 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001953 ctxt->nbChars ++;
1954 ctxt->input->cur++;
1955 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001956 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001957 SKIP(2);
1958 GROW;
1959 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001960 if (count++ > 20) {
1961 count = 0;
1962 GROW;
1963 }
1964 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001965 val = val * 10 + (CUR - '0');
1966 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001967 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001968 val = 0;
1969 break;
1970 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001971 if (val > 0x10FFFF)
1972 outofrange = val;
1973
Owen Taylor3473f882001-02-23 17:55:21 +00001974 NEXT;
1975 count++;
1976 }
1977 if (RAW == ';') {
1978 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001979 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001980 ctxt->nbChars ++;
1981 ctxt->input->cur++;
1982 }
1983 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001984 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001985 }
1986
1987 /*
1988 * [ WFC: Legal Character ]
1989 * Characters referred to using character references must match the
1990 * production for Char.
1991 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001992 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return(val);
1994 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001995 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1996 "xmlParseCharRef: invalid xmlChar value %d\n",
1997 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 }
1999 return(0);
2000}
2001
2002/**
2003 * xmlParseStringCharRef:
2004 * @ctxt: an XML parser context
2005 * @str: a pointer to an index in the string
2006 *
2007 * parse Reference declarations, variant parsing from a string rather
2008 * than an an input flow.
2009 *
2010 * [66] CharRef ::= '&#' [0-9]+ ';' |
2011 * '&#x' [0-9a-fA-F]+ ';'
2012 *
2013 * [ WFC: Legal Character ]
2014 * Characters referred to using character references must match the
2015 * production for Char.
2016 *
2017 * Returns the value parsed (as an int), 0 in case of error, str will be
2018 * updated to the current value of the index
2019 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002020static int
Owen Taylor3473f882001-02-23 17:55:21 +00002021xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2022 const xmlChar *ptr;
2023 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002024 unsigned int val = 0;
2025 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002026
2027 if ((str == NULL) || (*str == NULL)) return(0);
2028 ptr = *str;
2029 cur = *ptr;
2030 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2031 ptr += 3;
2032 cur = *ptr;
2033 while (cur != ';') { /* Non input consuming loop */
2034 if ((cur >= '0') && (cur <= '9'))
2035 val = val * 16 + (cur - '0');
2036 else if ((cur >= 'a') && (cur <= 'f'))
2037 val = val * 16 + (cur - 'a') + 10;
2038 else if ((cur >= 'A') && (cur <= 'F'))
2039 val = val * 16 + (cur - 'A') + 10;
2040 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002041 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002042 val = 0;
2043 break;
2044 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002045 if (val > 0x10FFFF)
2046 outofrange = val;
2047
Owen Taylor3473f882001-02-23 17:55:21 +00002048 ptr++;
2049 cur = *ptr;
2050 }
2051 if (cur == ';')
2052 ptr++;
2053 } else if ((cur == '&') && (ptr[1] == '#')){
2054 ptr += 2;
2055 cur = *ptr;
2056 while (cur != ';') { /* Non input consuming loops */
2057 if ((cur >= '0') && (cur <= '9'))
2058 val = val * 10 + (cur - '0');
2059 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002060 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002061 val = 0;
2062 break;
2063 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002064 if (val > 0x10FFFF)
2065 outofrange = val;
2066
Owen Taylor3473f882001-02-23 17:55:21 +00002067 ptr++;
2068 cur = *ptr;
2069 }
2070 if (cur == ';')
2071 ptr++;
2072 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002073 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002074 return(0);
2075 }
2076 *str = ptr;
2077
2078 /*
2079 * [ WFC: Legal Character ]
2080 * Characters referred to using character references must match the
2081 * production for Char.
2082 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002083 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002084 return(val);
2085 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002086 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2087 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2088 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002089 }
2090 return(0);
2091}
2092
2093/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002094 * xmlNewBlanksWrapperInputStream:
2095 * @ctxt: an XML parser context
2096 * @entity: an Entity pointer
2097 *
2098 * Create a new input stream for wrapping
2099 * blanks around a PEReference
2100 *
2101 * Returns the new input stream or NULL
2102 */
2103
2104static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2105
Daniel Veillardf4862f02002-09-10 11:13:43 +00002106static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002107xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2108 xmlParserInputPtr input;
2109 xmlChar *buffer;
2110 size_t length;
2111 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002112 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2113 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002114 return(NULL);
2115 }
2116 if (xmlParserDebugEntities)
2117 xmlGenericError(xmlGenericErrorContext,
2118 "new blanks wrapper for entity: %s\n", entity->name);
2119 input = xmlNewInputStream(ctxt);
2120 if (input == NULL) {
2121 return(NULL);
2122 }
2123 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002124 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002125 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002126 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002127 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002128 return(NULL);
2129 }
2130 buffer [0] = ' ';
2131 buffer [1] = '%';
2132 buffer [length-3] = ';';
2133 buffer [length-2] = ' ';
2134 buffer [length-1] = 0;
2135 memcpy(buffer + 2, entity->name, length - 5);
2136 input->free = deallocblankswrapper;
2137 input->base = buffer;
2138 input->cur = buffer;
2139 input->length = length;
2140 input->end = &buffer[length];
2141 return(input);
2142}
2143
2144/**
Owen Taylor3473f882001-02-23 17:55:21 +00002145 * xmlParserHandlePEReference:
2146 * @ctxt: the parser context
2147 *
2148 * [69] PEReference ::= '%' Name ';'
2149 *
2150 * [ WFC: No Recursion ]
2151 * A parsed entity must not contain a recursive
2152 * reference to itself, either directly or indirectly.
2153 *
2154 * [ WFC: Entity Declared ]
2155 * In a document without any DTD, a document with only an internal DTD
2156 * subset which contains no parameter entity references, or a document
2157 * with "standalone='yes'", ... ... The declaration of a parameter
2158 * entity must precede any reference to it...
2159 *
2160 * [ VC: Entity Declared ]
2161 * In a document with an external subset or external parameter entities
2162 * with "standalone='no'", ... ... The declaration of a parameter entity
2163 * must precede any reference to it...
2164 *
2165 * [ WFC: In DTD ]
2166 * Parameter-entity references may only appear in the DTD.
2167 * NOTE: misleading but this is handled.
2168 *
2169 * A PEReference may have been detected in the current input stream
2170 * the handling is done accordingly to
2171 * http://www.w3.org/TR/REC-xml#entproc
2172 * i.e.
2173 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002174 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002175 */
2176void
2177xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002178 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002179 xmlEntityPtr entity = NULL;
2180 xmlParserInputPtr input;
2181
Owen Taylor3473f882001-02-23 17:55:21 +00002182 if (RAW != '%') return;
2183 switch(ctxt->instate) {
2184 case XML_PARSER_CDATA_SECTION:
2185 return;
2186 case XML_PARSER_COMMENT:
2187 return;
2188 case XML_PARSER_START_TAG:
2189 return;
2190 case XML_PARSER_END_TAG:
2191 return;
2192 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002193 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002194 return;
2195 case XML_PARSER_PROLOG:
2196 case XML_PARSER_START:
2197 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002198 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002199 return;
2200 case XML_PARSER_ENTITY_DECL:
2201 case XML_PARSER_CONTENT:
2202 case XML_PARSER_ATTRIBUTE_VALUE:
2203 case XML_PARSER_PI:
2204 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002205 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002206 /* we just ignore it there */
2207 return;
2208 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002209 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 return;
2211 case XML_PARSER_ENTITY_VALUE:
2212 /*
2213 * NOTE: in the case of entity values, we don't do the
2214 * substitution here since we need the literal
2215 * entity value to be able to save the internal
2216 * subset of the document.
2217 * This will be handled by xmlStringDecodeEntities
2218 */
2219 return;
2220 case XML_PARSER_DTD:
2221 /*
2222 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2223 * In the internal DTD subset, parameter-entity references
2224 * can occur only where markup declarations can occur, not
2225 * within markup declarations.
2226 * In that case this is handled in xmlParseMarkupDecl
2227 */
2228 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2229 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002230 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002231 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002232 break;
2233 case XML_PARSER_IGNORE:
2234 return;
2235 }
2236
2237 NEXT;
2238 name = xmlParseName(ctxt);
2239 if (xmlParserDebugEntities)
2240 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002241 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002243 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002244 } else {
2245 if (RAW == ';') {
2246 NEXT;
2247 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2248 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2249 if (entity == NULL) {
2250
2251 /*
2252 * [ WFC: Entity Declared ]
2253 * In a document without any DTD, a document with only an
2254 * internal DTD subset which contains no parameter entity
2255 * references, or a document with "standalone='yes'", ...
2256 * ... The declaration of a parameter entity must precede
2257 * any reference to it...
2258 */
2259 if ((ctxt->standalone == 1) ||
2260 ((ctxt->hasExternalSubset == 0) &&
2261 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002262 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002263 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002264 } else {
2265 /*
2266 * [ VC: Entity Declared ]
2267 * In a document with an external subset or external
2268 * parameter entities with "standalone='no'", ...
2269 * ... The declaration of a parameter entity must precede
2270 * any reference to it...
2271 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002272 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2273 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2274 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002275 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002276 } else
2277 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2278 "PEReference: %%%s; not found\n",
2279 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002280 ctxt->valid = 0;
2281 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002282 } else if (ctxt->input->free != deallocblankswrapper) {
2283 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2284 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002285 } else {
2286 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2287 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002288 xmlChar start[4];
2289 xmlCharEncoding enc;
2290
Owen Taylor3473f882001-02-23 17:55:21 +00002291 /*
2292 * handle the extra spaces added before and after
2293 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002294 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002295 */
2296 input = xmlNewEntityInputStream(ctxt, entity);
2297 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002298
2299 /*
2300 * Get the 4 first bytes and decode the charset
2301 * if enc != XML_CHAR_ENCODING_NONE
2302 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002303 * Note that, since we may have some non-UTF8
2304 * encoding (like UTF16, bug 135229), the 'length'
2305 * is not known, but we can calculate based upon
2306 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002307 */
2308 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002309 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002310 start[0] = RAW;
2311 start[1] = NXT(1);
2312 start[2] = NXT(2);
2313 start[3] = NXT(3);
2314 enc = xmlDetectCharEncoding(start, 4);
2315 if (enc != XML_CHAR_ENCODING_NONE) {
2316 xmlSwitchEncoding(ctxt, enc);
2317 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002318 }
2319
Owen Taylor3473f882001-02-23 17:55:21 +00002320 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002321 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2322 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002323 xmlParseTextDecl(ctxt);
2324 }
Owen Taylor3473f882001-02-23 17:55:21 +00002325 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002326 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2327 "PEReference: %s is not a parameter entity\n",
2328 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002329 }
2330 }
2331 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002332 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002333 }
Owen Taylor3473f882001-02-23 17:55:21 +00002334 }
2335}
2336
2337/*
2338 * Macro used to grow the current buffer.
2339 */
2340#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002341 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002342 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002343 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002344 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002345 if (tmp == NULL) goto mem_error; \
2346 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002347}
2348
2349/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002350 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002351 * @ctxt: the parser context
2352 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002353 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002354 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2355 * @end: an end marker xmlChar, 0 if none
2356 * @end2: an end marker xmlChar, 0 if none
2357 * @end3: an end marker xmlChar, 0 if none
2358 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002359 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * [67] Reference ::= EntityRef | CharRef
2362 *
2363 * [69] PEReference ::= '%' Name ';'
2364 *
2365 * Returns A newly allocated string with the substitution done. The caller
2366 * must deallocate it !
2367 */
2368xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002369xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2370 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002371 xmlChar *buffer = NULL;
2372 int buffer_size = 0;
2373
2374 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002375 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002376 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002377 xmlEntityPtr ent;
2378 int c,l;
2379 int nbchars = 0;
2380
Daniel Veillarda82b1822004-11-08 16:24:57 +00002381 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002382 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002383 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002384
Daniel Veillard8915c152008-08-26 13:05:34 +00002385 if (((ctxt->depth > 20) || (ctxt->nbentities >= 100000)) &&
2386 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002387 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002388 return(NULL);
2389 }
2390
2391 /*
2392 * allocate a translation buffer.
2393 */
2394 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002395 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002396 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002397
2398 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002399 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002400 * we are operating on already parsed values.
2401 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002402 if (str < last)
2403 c = CUR_SCHAR(str, l);
2404 else
2405 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002406 while ((c != 0) && (c != end) && /* non input consuming loop */
2407 (c != end2) && (c != end3)) {
2408
2409 if (c == 0) break;
2410 if ((c == '&') && (str[1] == '#')) {
2411 int val = xmlParseStringCharRef(ctxt, &str);
2412 if (val != 0) {
2413 COPY_BUF(0,buffer,nbchars,val);
2414 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002415 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2416 growBuffer(buffer);
2417 }
Owen Taylor3473f882001-02-23 17:55:21 +00002418 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "String decoding Entity Reference: %.30s\n",
2422 str);
2423 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002424 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2425 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002426 goto int_error;
2427 ctxt->nbentities++;
2428 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002429 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002430 if ((ent != NULL) &&
2431 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2432 if (ent->content != NULL) {
2433 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002434 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2435 growBuffer(buffer);
2436 }
Owen Taylor3473f882001-02-23 17:55:21 +00002437 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002438 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2439 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002440 }
2441 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002442 ctxt->depth++;
2443 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2444 0, 0, 0);
2445 ctxt->depth--;
2446 if (rep != NULL) {
2447 current = rep;
2448 while (*current != 0) { /* non input consuming loop */
2449 buffer[nbchars++] = *current++;
2450 if (nbchars >
2451 buffer_size - XML_PARSER_BUFFER_SIZE) {
2452 growBuffer(buffer);
2453 }
2454 }
2455 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002456 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002457 }
2458 } else if (ent != NULL) {
2459 int i = xmlStrlen(ent->name);
2460 const xmlChar *cur = ent->name;
2461
2462 buffer[nbchars++] = '&';
2463 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2464 growBuffer(buffer);
2465 }
2466 for (;i > 0;i--)
2467 buffer[nbchars++] = *cur++;
2468 buffer[nbchars++] = ';';
2469 }
2470 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2471 if (xmlParserDebugEntities)
2472 xmlGenericError(xmlGenericErrorContext,
2473 "String decoding PE Reference: %.30s\n", str);
2474 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002475 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2476 goto int_error;
2477 ctxt->nbentities++;
2478 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002479 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002480 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002481 if (ent->content == NULL) {
2482 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2483 }
2484 }
Owen Taylor3473f882001-02-23 17:55:21 +00002485 ctxt->depth++;
2486 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2487 0, 0, 0);
2488 ctxt->depth--;
2489 if (rep != NULL) {
2490 current = rep;
2491 while (*current != 0) { /* non input consuming loop */
2492 buffer[nbchars++] = *current++;
2493 if (nbchars >
2494 buffer_size - XML_PARSER_BUFFER_SIZE) {
2495 growBuffer(buffer);
2496 }
2497 }
2498 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002499 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002500 }
2501 }
2502 } else {
2503 COPY_BUF(l,buffer,nbchars,c);
2504 str += l;
2505 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2506 growBuffer(buffer);
2507 }
2508 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002509 if (str < last)
2510 c = CUR_SCHAR(str, l);
2511 else
2512 c = 0;
Daniel Veillard8915c152008-08-26 13:05:34 +00002513 if ((nbchars > 100000) &&
2514 (ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
2515 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2516 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
2517 "Excessive lenght of attribute: %d use XML_PARSE_HUGE option\n",
2518 nbchars);
2519 goto int_error;
2520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 }
2522 buffer[nbchars++] = 0;
2523 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002524
2525mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002526 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002527int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002528 if (rep != NULL)
2529 xmlFree(rep);
2530 if (buffer != NULL)
2531 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002532 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002533}
2534
Daniel Veillarde57ec792003-09-10 10:50:59 +00002535/**
2536 * xmlStringDecodeEntities:
2537 * @ctxt: the parser context
2538 * @str: the input string
2539 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2540 * @end: an end marker xmlChar, 0 if none
2541 * @end2: an end marker xmlChar, 0 if none
2542 * @end3: an end marker xmlChar, 0 if none
2543 *
2544 * Takes a entity string content and process to do the adequate substitutions.
2545 *
2546 * [67] Reference ::= EntityRef | CharRef
2547 *
2548 * [69] PEReference ::= '%' Name ';'
2549 *
2550 * Returns A newly allocated string with the substitution done. The caller
2551 * must deallocate it !
2552 */
2553xmlChar *
2554xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2555 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002556 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2558 end, end2, end3));
2559}
Owen Taylor3473f882001-02-23 17:55:21 +00002560
2561/************************************************************************
2562 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002563 * Commodity functions, cleanup needed ? *
2564 * *
2565 ************************************************************************/
2566
2567/**
2568 * areBlanks:
2569 * @ctxt: an XML parser context
2570 * @str: a xmlChar *
2571 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002572 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002573 *
2574 * Is this a sequence of blank chars that one can ignore ?
2575 *
2576 * Returns 1 if ignorable 0 otherwise.
2577 */
2578
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002579static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2580 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002581 int i, ret;
2582 xmlNodePtr lastChild;
2583
Daniel Veillard05c13a22001-09-09 08:38:09 +00002584 /*
2585 * Don't spend time trying to differentiate them, the same callback is
2586 * used !
2587 */
2588 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002589 return(0);
2590
Owen Taylor3473f882001-02-23 17:55:21 +00002591 /*
2592 * Check for xml:space value.
2593 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002594 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2595 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002596 return(0);
2597
2598 /*
2599 * Check that the string is made of blanks
2600 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002601 if (blank_chars == 0) {
2602 for (i = 0;i < len;i++)
2603 if (!(IS_BLANK_CH(str[i]))) return(0);
2604 }
Owen Taylor3473f882001-02-23 17:55:21 +00002605
2606 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002607 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002608 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002609 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 if (ctxt->myDoc != NULL) {
2611 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2612 if (ret == 0) return(1);
2613 if (ret == 1) return(0);
2614 }
2615
2616 /*
2617 * Otherwise, heuristic :-\
2618 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002619 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002620 if ((ctxt->node->children == NULL) &&
2621 (RAW == '<') && (NXT(1) == '/')) return(0);
2622
2623 lastChild = xmlGetLastChild(ctxt->node);
2624 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002625 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2626 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 } else if (xmlNodeIsText(lastChild))
2628 return(0);
2629 else if ((ctxt->node->children != NULL) &&
2630 (xmlNodeIsText(ctxt->node->children)))
2631 return(0);
2632 return(1);
2633}
2634
Owen Taylor3473f882001-02-23 17:55:21 +00002635/************************************************************************
2636 * *
2637 * Extra stuff for namespace support *
2638 * Relates to http://www.w3.org/TR/WD-xml-names *
2639 * *
2640 ************************************************************************/
2641
2642/**
2643 * xmlSplitQName:
2644 * @ctxt: an XML parser context
2645 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002646 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002647 *
2648 * parse an UTF8 encoded XML qualified name string
2649 *
2650 * [NS 5] QName ::= (Prefix ':')? LocalPart
2651 *
2652 * [NS 6] Prefix ::= NCName
2653 *
2654 * [NS 7] LocalPart ::= NCName
2655 *
2656 * Returns the local part, and prefix is updated
2657 * to get the Prefix if any.
2658 */
2659
2660xmlChar *
2661xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2662 xmlChar buf[XML_MAX_NAMELEN + 5];
2663 xmlChar *buffer = NULL;
2664 int len = 0;
2665 int max = XML_MAX_NAMELEN;
2666 xmlChar *ret = NULL;
2667 const xmlChar *cur = name;
2668 int c;
2669
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002670 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002671 *prefix = NULL;
2672
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002673 if (cur == NULL) return(NULL);
2674
Owen Taylor3473f882001-02-23 17:55:21 +00002675#ifndef XML_XML_NAMESPACE
2676 /* xml: prefix is not really a namespace */
2677 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2678 (cur[2] == 'l') && (cur[3] == ':'))
2679 return(xmlStrdup(name));
2680#endif
2681
Daniel Veillard597bc482003-07-24 16:08:28 +00002682 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002683 if (cur[0] == ':')
2684 return(xmlStrdup(name));
2685
2686 c = *cur++;
2687 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2688 buf[len++] = c;
2689 c = *cur++;
2690 }
2691 if (len >= max) {
2692 /*
2693 * Okay someone managed to make a huge name, so he's ready to pay
2694 * for the processing speed.
2695 */
2696 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002697
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002698 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002699 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002700 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 return(NULL);
2702 }
2703 memcpy(buffer, buf, len);
2704 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2705 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002706 xmlChar *tmp;
2707
Owen Taylor3473f882001-02-23 17:55:21 +00002708 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002709 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002710 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002711 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002712 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002713 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002714 return(NULL);
2715 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002716 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002717 }
2718 buffer[len++] = c;
2719 c = *cur++;
2720 }
2721 buffer[len] = 0;
2722 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002723
Daniel Veillard597bc482003-07-24 16:08:28 +00002724 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002725 if (buffer != NULL)
2726 xmlFree(buffer);
2727 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002728 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002729 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002730
Owen Taylor3473f882001-02-23 17:55:21 +00002731 if (buffer == NULL)
2732 ret = xmlStrndup(buf, len);
2733 else {
2734 ret = buffer;
2735 buffer = NULL;
2736 max = XML_MAX_NAMELEN;
2737 }
2738
2739
2740 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002741 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002743 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002744 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002745 }
Owen Taylor3473f882001-02-23 17:55:21 +00002746 len = 0;
2747
Daniel Veillardbb284f42002-10-16 18:02:47 +00002748 /*
2749 * Check that the first character is proper to start
2750 * a new name
2751 */
2752 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2753 ((c >= 0x41) && (c <= 0x5A)) ||
2754 (c == '_') || (c == ':'))) {
2755 int l;
2756 int first = CUR_SCHAR(cur, l);
2757
2758 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002759 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002760 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002761 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002762 }
2763 }
2764 cur++;
2765
Owen Taylor3473f882001-02-23 17:55:21 +00002766 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2767 buf[len++] = c;
2768 c = *cur++;
2769 }
2770 if (len >= max) {
2771 /*
2772 * Okay someone managed to make a huge name, so he's ready to pay
2773 * for the processing speed.
2774 */
2775 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002776
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002777 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002778 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002779 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002780 return(NULL);
2781 }
2782 memcpy(buffer, buf, len);
2783 while (c != 0) { /* tested bigname2.xml */
2784 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002785 xmlChar *tmp;
2786
Owen Taylor3473f882001-02-23 17:55:21 +00002787 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002789 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002790 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002791 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002792 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002793 return(NULL);
2794 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002795 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002796 }
2797 buffer[len++] = c;
2798 c = *cur++;
2799 }
2800 buffer[len] = 0;
2801 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (buffer == NULL)
2804 ret = xmlStrndup(buf, len);
2805 else {
2806 ret = buffer;
2807 }
2808 }
2809
2810 return(ret);
2811}
2812
2813/************************************************************************
2814 * *
2815 * The parser itself *
2816 * Relates to http://www.w3.org/TR/REC-xml *
2817 * *
2818 ************************************************************************/
2819
Daniel Veillard34e3f642008-07-29 09:02:27 +00002820/************************************************************************
2821 * *
2822 * Routines to parse Name, NCName and NmToken *
2823 * *
2824 ************************************************************************/
2825unsigned long nbParseName = 0;
2826unsigned long nbParseNmToken = 0;
2827unsigned long nbParseNCName = 0;
2828unsigned long nbParseNCNameComplex = 0;
2829unsigned long nbParseNameComplex = 0;
2830unsigned long nbParseStringName = 0;
2831/*
2832 * The two following functions are related to the change of accepted
2833 * characters for Name and NmToken in the Revision 5 of XML-1.0
2834 * They correspond to the modified production [4] and the new production [4a]
2835 * changes in that revision. Also note that the macros used for the
2836 * productions Letter, Digit, CombiningChar and Extender are not needed
2837 * anymore.
2838 * We still keep compatibility to pre-revision5 parsing semantic if the
2839 * new XML_PARSE_OLD10 option is given to the parser.
2840 */
2841static int
2842xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2843 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2844 /*
2845 * Use the new checks of production [4] [4a] amd [5] of the
2846 * Update 5 of XML-1.0
2847 */
2848 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2849 (((c >= 'a') && (c <= 'z')) ||
2850 ((c >= 'A') && (c <= 'Z')) ||
2851 (c == '_') || (c == ':') ||
2852 ((c >= 0xC0) && (c <= 0xD6)) ||
2853 ((c >= 0xD8) && (c <= 0xF6)) ||
2854 ((c >= 0xF8) && (c <= 0x2FF)) ||
2855 ((c >= 0x370) && (c <= 0x37D)) ||
2856 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2857 ((c >= 0x200C) && (c <= 0x200D)) ||
2858 ((c >= 0x2070) && (c <= 0x218F)) ||
2859 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2860 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2861 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2862 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2863 ((c >= 0x10000) && (c <= 0xEFFFF))))
2864 return(1);
2865 } else {
2866 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2867 return(1);
2868 }
2869 return(0);
2870}
2871
2872static int
2873xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2874 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2875 /*
2876 * Use the new checks of production [4] [4a] amd [5] of the
2877 * Update 5 of XML-1.0
2878 */
2879 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2880 (((c >= 'a') && (c <= 'z')) ||
2881 ((c >= 'A') && (c <= 'Z')) ||
2882 ((c >= '0') && (c <= '9')) || /* !start */
2883 (c == '_') || (c == ':') ||
2884 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2885 ((c >= 0xC0) && (c <= 0xD6)) ||
2886 ((c >= 0xD8) && (c <= 0xF6)) ||
2887 ((c >= 0xF8) && (c <= 0x2FF)) ||
2888 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2889 ((c >= 0x370) && (c <= 0x37D)) ||
2890 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2891 ((c >= 0x200C) && (c <= 0x200D)) ||
2892 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2893 ((c >= 0x2070) && (c <= 0x218F)) ||
2894 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2895 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2896 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2897 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2898 ((c >= 0x10000) && (c <= 0xEFFFF))))
2899 return(1);
2900 } else {
2901 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2902 (c == '.') || (c == '-') ||
2903 (c == '_') || (c == ':') ||
2904 (IS_COMBINING(c)) ||
2905 (IS_EXTENDER(c)))
2906 return(1);
2907 }
2908 return(0);
2909}
2910
Daniel Veillarde57ec792003-09-10 10:50:59 +00002911static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002912 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002913
Daniel Veillard34e3f642008-07-29 09:02:27 +00002914static const xmlChar *
2915xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2916 int len = 0, l;
2917 int c;
2918 int count = 0;
2919
2920 nbParseNameComplex++;
2921
2922 /*
2923 * Handler for more complex cases
2924 */
2925 GROW;
2926 c = CUR_CHAR(l);
2927 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2928 /*
2929 * Use the new checks of production [4] [4a] amd [5] of the
2930 * Update 5 of XML-1.0
2931 */
2932 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2933 (!(((c >= 'a') && (c <= 'z')) ||
2934 ((c >= 'A') && (c <= 'Z')) ||
2935 (c == '_') || (c == ':') ||
2936 ((c >= 0xC0) && (c <= 0xD6)) ||
2937 ((c >= 0xD8) && (c <= 0xF6)) ||
2938 ((c >= 0xF8) && (c <= 0x2FF)) ||
2939 ((c >= 0x370) && (c <= 0x37D)) ||
2940 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2941 ((c >= 0x200C) && (c <= 0x200D)) ||
2942 ((c >= 0x2070) && (c <= 0x218F)) ||
2943 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2944 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2945 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2946 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2947 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
2948 return(NULL);
2949 }
2950 len += l;
2951 NEXTL(l);
2952 c = CUR_CHAR(l);
2953 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2954 (((c >= 'a') && (c <= 'z')) ||
2955 ((c >= 'A') && (c <= 'Z')) ||
2956 ((c >= '0') && (c <= '9')) || /* !start */
2957 (c == '_') || (c == ':') ||
2958 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2959 ((c >= 0xC0) && (c <= 0xD6)) ||
2960 ((c >= 0xD8) && (c <= 0xF6)) ||
2961 ((c >= 0xF8) && (c <= 0x2FF)) ||
2962 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2963 ((c >= 0x370) && (c <= 0x37D)) ||
2964 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2965 ((c >= 0x200C) && (c <= 0x200D)) ||
2966 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2967 ((c >= 0x2070) && (c <= 0x218F)) ||
2968 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2969 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2970 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2971 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2972 ((c >= 0x10000) && (c <= 0xEFFFF))
2973 )) {
2974 if (count++ > 100) {
2975 count = 0;
2976 GROW;
2977 }
2978 len += l;
2979 NEXTL(l);
2980 c = CUR_CHAR(l);
2981 }
2982 } else {
2983 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2984 (!IS_LETTER(c) && (c != '_') &&
2985 (c != ':'))) {
2986 return(NULL);
2987 }
2988 len += l;
2989 NEXTL(l);
2990 c = CUR_CHAR(l);
2991
2992 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2993 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2994 (c == '.') || (c == '-') ||
2995 (c == '_') || (c == ':') ||
2996 (IS_COMBINING(c)) ||
2997 (IS_EXTENDER(c)))) {
2998 if (count++ > 100) {
2999 count = 0;
3000 GROW;
3001 }
3002 len += l;
3003 NEXTL(l);
3004 c = CUR_CHAR(l);
3005 }
3006 }
3007 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3008 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3009 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3010}
3011
Owen Taylor3473f882001-02-23 17:55:21 +00003012/**
3013 * xmlParseName:
3014 * @ctxt: an XML parser context
3015 *
3016 * parse an XML name.
3017 *
3018 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3019 * CombiningChar | Extender
3020 *
3021 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3022 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003023 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003024 *
3025 * Returns the Name parsed or NULL
3026 */
3027
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003028const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003029xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003030 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003031 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003032 int count = 0;
3033
3034 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003035
Daniel Veillard34e3f642008-07-29 09:02:27 +00003036 nbParseName++;
3037
Daniel Veillard48b2f892001-02-25 16:11:03 +00003038 /*
3039 * Accelerator for simple ASCII names
3040 */
3041 in = ctxt->input->cur;
3042 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3043 ((*in >= 0x41) && (*in <= 0x5A)) ||
3044 (*in == '_') || (*in == ':')) {
3045 in++;
3046 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3047 ((*in >= 0x41) && (*in <= 0x5A)) ||
3048 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003049 (*in == '_') || (*in == '-') ||
3050 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003051 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003052 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003053 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003054 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003055 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003056 ctxt->nbChars += count;
3057 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003058 if (ret == NULL)
3059 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003060 return(ret);
3061 }
3062 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003063 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003064 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003065}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003066
Daniel Veillard34e3f642008-07-29 09:02:27 +00003067static const xmlChar *
3068xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3069 int len = 0, l;
3070 int c;
3071 int count = 0;
3072
3073 nbParseNCNameComplex++;
3074
3075 /*
3076 * Handler for more complex cases
3077 */
3078 GROW;
3079 c = CUR_CHAR(l);
3080 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3081 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3082 return(NULL);
3083 }
3084
3085 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3086 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3087 if (count++ > 100) {
3088 count = 0;
3089 GROW;
3090 }
3091 len += l;
3092 NEXTL(l);
3093 c = CUR_CHAR(l);
3094 }
3095 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3096}
3097
3098/**
3099 * xmlParseNCName:
3100 * @ctxt: an XML parser context
3101 * @len: lenght of the string parsed
3102 *
3103 * parse an XML name.
3104 *
3105 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3106 * CombiningChar | Extender
3107 *
3108 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3109 *
3110 * Returns the Name parsed or NULL
3111 */
3112
3113static const xmlChar *
3114xmlParseNCName(xmlParserCtxtPtr ctxt) {
3115 const xmlChar *in;
3116 const xmlChar *ret;
3117 int count = 0;
3118
3119 nbParseNCName++;
3120
3121 /*
3122 * Accelerator for simple ASCII names
3123 */
3124 in = ctxt->input->cur;
3125 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3126 ((*in >= 0x41) && (*in <= 0x5A)) ||
3127 (*in == '_')) {
3128 in++;
3129 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3130 ((*in >= 0x41) && (*in <= 0x5A)) ||
3131 ((*in >= 0x30) && (*in <= 0x39)) ||
3132 (*in == '_') || (*in == '-') ||
3133 (*in == '.'))
3134 in++;
3135 if ((*in > 0) && (*in < 0x80)) {
3136 count = in - ctxt->input->cur;
3137 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3138 ctxt->input->cur = in;
3139 ctxt->nbChars += count;
3140 ctxt->input->col += count;
3141 if (ret == NULL) {
3142 xmlErrMemory(ctxt, NULL);
3143 }
3144 return(ret);
3145 }
3146 }
3147 return(xmlParseNCNameComplex(ctxt));
3148}
3149
Daniel Veillard46de64e2002-05-29 08:21:33 +00003150/**
3151 * xmlParseNameAndCompare:
3152 * @ctxt: an XML parser context
3153 *
3154 * parse an XML name and compares for match
3155 * (specialized for endtag parsing)
3156 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003157 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3158 * and the name for mismatch
3159 */
3160
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003161static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003162xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003163 register const xmlChar *cmp = other;
3164 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003165 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003166
3167 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003168
Daniel Veillard46de64e2002-05-29 08:21:33 +00003169 in = ctxt->input->cur;
3170 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003172 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003173 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003174 }
William M. Brack76e95df2003-10-18 16:20:14 +00003175 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003177 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003178 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003179 }
3180 /* failure (or end of input buffer), check with full function */
3181 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003182 /* strings coming from the dictionnary direct compare possible */
3183 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003184 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003185 }
3186 return ret;
3187}
3188
Owen Taylor3473f882001-02-23 17:55:21 +00003189/**
3190 * xmlParseStringName:
3191 * @ctxt: an XML parser context
3192 * @str: a pointer to the string pointer (IN/OUT)
3193 *
3194 * parse an XML name.
3195 *
3196 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3197 * CombiningChar | Extender
3198 *
3199 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3200 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003201 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003202 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003203 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003204 * is updated to the current location in the string.
3205 */
3206
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003207static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003208xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3209 xmlChar buf[XML_MAX_NAMELEN + 5];
3210 const xmlChar *cur = *str;
3211 int len = 0, l;
3212 int c;
3213
Daniel Veillard34e3f642008-07-29 09:02:27 +00003214 nbParseStringName++;
3215
Owen Taylor3473f882001-02-23 17:55:21 +00003216 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003217 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003218 return(NULL);
3219 }
3220
Daniel Veillard34e3f642008-07-29 09:02:27 +00003221 COPY_BUF(l,buf,len,c);
3222 cur += l;
3223 c = CUR_SCHAR(cur, l);
3224 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003225 COPY_BUF(l,buf,len,c);
3226 cur += l;
3227 c = CUR_SCHAR(cur, l);
3228 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3229 /*
3230 * Okay someone managed to make a huge name, so he's ready to pay
3231 * for the processing speed.
3232 */
3233 xmlChar *buffer;
3234 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003235
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003236 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003237 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003238 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003239 return(NULL);
3240 }
3241 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003242 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003243 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003244 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003245 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003246 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003247 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003248 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003249 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003250 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003251 return(NULL);
3252 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003253 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003254 }
3255 COPY_BUF(l,buffer,len,c);
3256 cur += l;
3257 c = CUR_SCHAR(cur, l);
3258 }
3259 buffer[len] = 0;
3260 *str = cur;
3261 return(buffer);
3262 }
3263 }
3264 *str = cur;
3265 return(xmlStrndup(buf, len));
3266}
3267
3268/**
3269 * xmlParseNmtoken:
3270 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003271 *
Owen Taylor3473f882001-02-23 17:55:21 +00003272 * parse an XML Nmtoken.
3273 *
3274 * [7] Nmtoken ::= (NameChar)+
3275 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003276 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003277 *
3278 * Returns the Nmtoken parsed or NULL
3279 */
3280
3281xmlChar *
3282xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3283 xmlChar buf[XML_MAX_NAMELEN + 5];
3284 int len = 0, l;
3285 int c;
3286 int count = 0;
3287
Daniel Veillard34e3f642008-07-29 09:02:27 +00003288 nbParseNmToken++;
3289
Owen Taylor3473f882001-02-23 17:55:21 +00003290 GROW;
3291 c = CUR_CHAR(l);
3292
Daniel Veillard34e3f642008-07-29 09:02:27 +00003293 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003294 if (count++ > 100) {
3295 count = 0;
3296 GROW;
3297 }
3298 COPY_BUF(l,buf,len,c);
3299 NEXTL(l);
3300 c = CUR_CHAR(l);
3301 if (len >= XML_MAX_NAMELEN) {
3302 /*
3303 * Okay someone managed to make a huge token, so he's ready to pay
3304 * for the processing speed.
3305 */
3306 xmlChar *buffer;
3307 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003308
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003309 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003310 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 return(NULL);
3313 }
3314 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003315 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003316 if (count++ > 100) {
3317 count = 0;
3318 GROW;
3319 }
3320 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003321 xmlChar *tmp;
3322
Owen Taylor3473f882001-02-23 17:55:21 +00003323 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003324 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003325 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003326 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003327 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003328 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003329 return(NULL);
3330 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003331 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003332 }
3333 COPY_BUF(l,buffer,len,c);
3334 NEXTL(l);
3335 c = CUR_CHAR(l);
3336 }
3337 buffer[len] = 0;
3338 return(buffer);
3339 }
3340 }
3341 if (len == 0)
3342 return(NULL);
3343 return(xmlStrndup(buf, len));
3344}
3345
3346/**
3347 * xmlParseEntityValue:
3348 * @ctxt: an XML parser context
3349 * @orig: if non-NULL store a copy of the original entity value
3350 *
3351 * parse a value for ENTITY declarations
3352 *
3353 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3354 * "'" ([^%&'] | PEReference | Reference)* "'"
3355 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003356 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003357 */
3358
3359xmlChar *
3360xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3361 xmlChar *buf = NULL;
3362 int len = 0;
3363 int size = XML_PARSER_BUFFER_SIZE;
3364 int c, l;
3365 xmlChar stop;
3366 xmlChar *ret = NULL;
3367 const xmlChar *cur = NULL;
3368 xmlParserInputPtr input;
3369
3370 if (RAW == '"') stop = '"';
3371 else if (RAW == '\'') stop = '\'';
3372 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003373 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003374 return(NULL);
3375 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003376 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003377 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003378 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(NULL);
3380 }
3381
3382 /*
3383 * The content of the entity definition is copied in a buffer.
3384 */
3385
3386 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3387 input = ctxt->input;
3388 GROW;
3389 NEXT;
3390 c = CUR_CHAR(l);
3391 /*
3392 * NOTE: 4.4.5 Included in Literal
3393 * When a parameter entity reference appears in a literal entity
3394 * value, ... a single or double quote character in the replacement
3395 * text is always treated as a normal data character and will not
3396 * terminate the literal.
3397 * In practice it means we stop the loop only when back at parsing
3398 * the initial entity and the quote is found
3399 */
William M. Brack871611b2003-10-18 04:53:14 +00003400 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003401 (ctxt->input != input))) {
3402 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003403 xmlChar *tmp;
3404
Owen Taylor3473f882001-02-23 17:55:21 +00003405 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003406 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3407 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003408 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003409 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 return(NULL);
3411 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003412 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003413 }
3414 COPY_BUF(l,buf,len,c);
3415 NEXTL(l);
3416 /*
3417 * Pop-up of finished entities.
3418 */
3419 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3420 xmlPopInput(ctxt);
3421
3422 GROW;
3423 c = CUR_CHAR(l);
3424 if (c == 0) {
3425 GROW;
3426 c = CUR_CHAR(l);
3427 }
3428 }
3429 buf[len] = 0;
3430
3431 /*
3432 * Raise problem w.r.t. '&' and '%' being used in non-entities
3433 * reference constructs. Note Charref will be handled in
3434 * xmlStringDecodeEntities()
3435 */
3436 cur = buf;
3437 while (*cur != 0) { /* non input consuming */
3438 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3439 xmlChar *name;
3440 xmlChar tmp = *cur;
3441
3442 cur++;
3443 name = xmlParseStringName(ctxt, &cur);
3444 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003445 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003446 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003447 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003448 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003449 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3450 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003451 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003452 }
3453 if (name != NULL)
3454 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003455 if (*cur == 0)
3456 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003457 }
3458 cur++;
3459 }
3460
3461 /*
3462 * Then PEReference entities are substituted.
3463 */
3464 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003465 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003466 xmlFree(buf);
3467 } else {
3468 NEXT;
3469 /*
3470 * NOTE: 4.4.7 Bypassed
3471 * When a general entity reference appears in the EntityValue in
3472 * an entity declaration, it is bypassed and left as is.
3473 * so XML_SUBSTITUTE_REF is not set here.
3474 */
3475 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3476 0, 0, 0);
3477 if (orig != NULL)
3478 *orig = buf;
3479 else
3480 xmlFree(buf);
3481 }
3482
3483 return(ret);
3484}
3485
3486/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003487 * xmlParseAttValueComplex:
3488 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003489 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003490 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003491 *
3492 * parse a value for an attribute, this is the fallback function
3493 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003494 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003495 *
3496 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3497 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003498static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003499xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003500 xmlChar limit = 0;
3501 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003502 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003503 int len = 0;
3504 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003505 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003506 xmlChar *current = NULL;
3507 xmlEntityPtr ent;
3508
Owen Taylor3473f882001-02-23 17:55:21 +00003509 if (NXT(0) == '"') {
3510 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3511 limit = '"';
3512 NEXT;
3513 } else if (NXT(0) == '\'') {
3514 limit = '\'';
3515 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3516 NEXT;
3517 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003518 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003519 return(NULL);
3520 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003521
Owen Taylor3473f882001-02-23 17:55:21 +00003522 /*
3523 * allocate a translation buffer.
3524 */
3525 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003526 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003527 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003528
3529 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003530 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003531 */
3532 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003533 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003534 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003535 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003536 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003537 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003538 if (NXT(1) == '#') {
3539 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003540
Owen Taylor3473f882001-02-23 17:55:21 +00003541 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003542 if (ctxt->replaceEntities) {
3543 if (len > buf_size - 10) {
3544 growBuffer(buf);
3545 }
3546 buf[len++] = '&';
3547 } else {
3548 /*
3549 * The reparsing will be done in xmlStringGetNodeList()
3550 * called by the attribute() function in SAX.c
3551 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003552 if (len > buf_size - 10) {
3553 growBuffer(buf);
3554 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003555 buf[len++] = '&';
3556 buf[len++] = '#';
3557 buf[len++] = '3';
3558 buf[len++] = '8';
3559 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003561 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003562 if (len > buf_size - 10) {
3563 growBuffer(buf);
3564 }
Owen Taylor3473f882001-02-23 17:55:21 +00003565 len += xmlCopyChar(0, &buf[len], val);
3566 }
3567 } else {
3568 ent = xmlParseEntityRef(ctxt);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00003569 ctxt->nbentities++;
3570 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00003571 ctxt->nbentities += ent->checked;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003572 if ((ent != NULL) &&
3573 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3574 if (len > buf_size - 10) {
3575 growBuffer(buf);
3576 }
3577 if ((ctxt->replaceEntities == 0) &&
3578 (ent->content[0] == '&')) {
3579 buf[len++] = '&';
3580 buf[len++] = '#';
3581 buf[len++] = '3';
3582 buf[len++] = '8';
3583 buf[len++] = ';';
3584 } else {
3585 buf[len++] = ent->content[0];
3586 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003587 } else if ((ent != NULL) &&
3588 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3590 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003591 XML_SUBSTITUTE_REF,
3592 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003593 if (rep != NULL) {
3594 current = rep;
3595 while (*current != 0) { /* non input consuming */
3596 buf[len++] = *current++;
3597 if (len > buf_size - 10) {
3598 growBuffer(buf);
3599 }
3600 }
3601 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003602 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003603 }
3604 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003605 if (len > buf_size - 10) {
3606 growBuffer(buf);
3607 }
Owen Taylor3473f882001-02-23 17:55:21 +00003608 if (ent->content != NULL)
3609 buf[len++] = ent->content[0];
3610 }
3611 } else if (ent != NULL) {
3612 int i = xmlStrlen(ent->name);
3613 const xmlChar *cur = ent->name;
3614
3615 /*
3616 * This may look absurd but is needed to detect
3617 * entities problems
3618 */
3619 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3620 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003622 XML_SUBSTITUTE_REF, 0, 0, 0);
3623 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003624 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003625 rep = NULL;
3626 }
Owen Taylor3473f882001-02-23 17:55:21 +00003627 }
3628
3629 /*
3630 * Just output the reference
3631 */
3632 buf[len++] = '&';
3633 if (len > buf_size - i - 10) {
3634 growBuffer(buf);
3635 }
3636 for (;i > 0;i--)
3637 buf[len++] = *cur++;
3638 buf[len++] = ';';
3639 }
3640 }
3641 } else {
3642 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003643 if ((len != 0) || (!normalize)) {
3644 if ((!normalize) || (!in_space)) {
3645 COPY_BUF(l,buf,len,0x20);
3646 if (len > buf_size - 10) {
3647 growBuffer(buf);
3648 }
3649 }
3650 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003651 }
3652 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003653 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003654 COPY_BUF(l,buf,len,c);
3655 if (len > buf_size - 10) {
3656 growBuffer(buf);
3657 }
3658 }
3659 NEXTL(l);
3660 }
3661 GROW;
3662 c = CUR_CHAR(l);
Daniel Veillard8915c152008-08-26 13:05:34 +00003663 if ((len > 100000) &&
3664 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3665 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
3666 "Excessive lenght of attribute: %d use XML_PARSE_HUGE option\n",
3667 len);
3668 goto int_error;
3669 }
Owen Taylor3473f882001-02-23 17:55:21 +00003670 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003671 if ((in_space) && (normalize)) {
3672 while (buf[len - 1] == 0x20) len--;
3673 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003674 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003675 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003676 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003677 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003678 if ((c != 0) && (!IS_CHAR(c))) {
3679 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3680 "invalid character in attribute value\n");
3681 } else {
3682 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3683 "AttValue: ' expected\n");
3684 }
Owen Taylor3473f882001-02-23 17:55:21 +00003685 } else
3686 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003687 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003688 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003689
3690mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003691 xmlErrMemory(ctxt, NULL);
Daniel Veillard8915c152008-08-26 13:05:34 +00003692int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00003693 if (buf != NULL)
3694 xmlFree(buf);
3695 if (rep != NULL)
3696 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003697 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003698}
3699
3700/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003701 * xmlParseAttValue:
3702 * @ctxt: an XML parser context
3703 *
3704 * parse a value for an attribute
3705 * Note: the parser won't do substitution of entities here, this
3706 * will be handled later in xmlStringGetNodeList
3707 *
3708 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3709 * "'" ([^<&'] | Reference)* "'"
3710 *
3711 * 3.3.3 Attribute-Value Normalization:
3712 * Before the value of an attribute is passed to the application or
3713 * checked for validity, the XML processor must normalize it as follows:
3714 * - a character reference is processed by appending the referenced
3715 * character to the attribute value
3716 * - an entity reference is processed by recursively processing the
3717 * replacement text of the entity
3718 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3719 * appending #x20 to the normalized value, except that only a single
3720 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3721 * parsed entity or the literal entity value of an internal parsed entity
3722 * - other characters are processed by appending them to the normalized value
3723 * If the declared value is not CDATA, then the XML processor must further
3724 * process the normalized attribute value by discarding any leading and
3725 * trailing space (#x20) characters, and by replacing sequences of space
3726 * (#x20) characters by a single space (#x20) character.
3727 * All attributes for which no declaration has been read should be treated
3728 * by a non-validating parser as if declared CDATA.
3729 *
3730 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3731 */
3732
3733
3734xmlChar *
3735xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003736 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003737 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003738}
3739
3740/**
Owen Taylor3473f882001-02-23 17:55:21 +00003741 * xmlParseSystemLiteral:
3742 * @ctxt: an XML parser context
3743 *
3744 * parse an XML Literal
3745 *
3746 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3747 *
3748 * Returns the SystemLiteral parsed or NULL
3749 */
3750
3751xmlChar *
3752xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3753 xmlChar *buf = NULL;
3754 int len = 0;
3755 int size = XML_PARSER_BUFFER_SIZE;
3756 int cur, l;
3757 xmlChar stop;
3758 int state = ctxt->instate;
3759 int count = 0;
3760
3761 SHRINK;
3762 if (RAW == '"') {
3763 NEXT;
3764 stop = '"';
3765 } else if (RAW == '\'') {
3766 NEXT;
3767 stop = '\'';
3768 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003769 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003770 return(NULL);
3771 }
3772
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003773 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003774 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003775 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return(NULL);
3777 }
3778 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3779 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003780 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003781 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003782 xmlChar *tmp;
3783
Owen Taylor3473f882001-02-23 17:55:21 +00003784 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003785 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3786 if (tmp == NULL) {
3787 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003788 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003789 ctxt->instate = (xmlParserInputState) state;
3790 return(NULL);
3791 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003792 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 }
3794 count++;
3795 if (count > 50) {
3796 GROW;
3797 count = 0;
3798 }
3799 COPY_BUF(l,buf,len,cur);
3800 NEXTL(l);
3801 cur = CUR_CHAR(l);
3802 if (cur == 0) {
3803 GROW;
3804 SHRINK;
3805 cur = CUR_CHAR(l);
3806 }
3807 }
3808 buf[len] = 0;
3809 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003810 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003811 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003812 } else {
3813 NEXT;
3814 }
3815 return(buf);
3816}
3817
3818/**
3819 * xmlParsePubidLiteral:
3820 * @ctxt: an XML parser context
3821 *
3822 * parse an XML public literal
3823 *
3824 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3825 *
3826 * Returns the PubidLiteral parsed or NULL.
3827 */
3828
3829xmlChar *
3830xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3831 xmlChar *buf = NULL;
3832 int len = 0;
3833 int size = XML_PARSER_BUFFER_SIZE;
3834 xmlChar cur;
3835 xmlChar stop;
3836 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003837 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003838
3839 SHRINK;
3840 if (RAW == '"') {
3841 NEXT;
3842 stop = '"';
3843 } else if (RAW == '\'') {
3844 NEXT;
3845 stop = '\'';
3846 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003847 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003848 return(NULL);
3849 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003850 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003851 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003852 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003853 return(NULL);
3854 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003855 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003856 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003857 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003859 xmlChar *tmp;
3860
Owen Taylor3473f882001-02-23 17:55:21 +00003861 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003862 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3863 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003864 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003865 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003866 return(NULL);
3867 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003868 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
3870 buf[len++] = cur;
3871 count++;
3872 if (count > 50) {
3873 GROW;
3874 count = 0;
3875 }
3876 NEXT;
3877 cur = CUR;
3878 if (cur == 0) {
3879 GROW;
3880 SHRINK;
3881 cur = CUR;
3882 }
3883 }
3884 buf[len] = 0;
3885 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003886 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 } else {
3888 NEXT;
3889 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003890 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003891 return(buf);
3892}
3893
Daniel Veillard48b2f892001-02-25 16:11:03 +00003894void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003895
3896/*
3897 * used for the test in the inner loop of the char data testing
3898 */
3899static const unsigned char test_char_data[256] = {
3900 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3901 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3902 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3903 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3904 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3905 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3906 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3907 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3908 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3909 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3910 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3911 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3912 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3913 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3914 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3915 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3916 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3917 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3918 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3919 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3920 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3921 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3922 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3923 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3924 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3925 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3926 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3927 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3928 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3929 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3930 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3931 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3932};
3933
Owen Taylor3473f882001-02-23 17:55:21 +00003934/**
3935 * xmlParseCharData:
3936 * @ctxt: an XML parser context
3937 * @cdata: int indicating whether we are within a CDATA section
3938 *
3939 * parse a CharData section.
3940 * if we are within a CDATA section ']]>' marks an end of section.
3941 *
3942 * The right angle bracket (>) may be represented using the string "&gt;",
3943 * and must, for compatibility, be escaped using "&gt;" or a character
3944 * reference when it appears in the string "]]>" in content, when that
3945 * string is not marking the end of a CDATA section.
3946 *
3947 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3948 */
3949
3950void
3951xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003952 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003953 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003954 int line = ctxt->input->line;
3955 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003956 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003957
3958 SHRINK;
3959 GROW;
3960 /*
3961 * Accelerated common case where input don't need to be
3962 * modified before passing it to the handler.
3963 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003964 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003965 in = ctxt->input->cur;
3966 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003967get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003968 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003969 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003970 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003971 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003972 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003973 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003974 goto get_more_space;
3975 }
3976 if (*in == '<') {
3977 nbchar = in - ctxt->input->cur;
3978 if (nbchar > 0) {
3979 const xmlChar *tmp = ctxt->input->cur;
3980 ctxt->input->cur = in;
3981
Daniel Veillard34099b42004-11-04 17:34:35 +00003982 if ((ctxt->sax != NULL) &&
3983 (ctxt->sax->ignorableWhitespace !=
3984 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003985 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003986 if (ctxt->sax->ignorableWhitespace != NULL)
3987 ctxt->sax->ignorableWhitespace(ctxt->userData,
3988 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003989 } else {
3990 if (ctxt->sax->characters != NULL)
3991 ctxt->sax->characters(ctxt->userData,
3992 tmp, nbchar);
3993 if (*ctxt->space == -1)
3994 *ctxt->space = -2;
3995 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003996 } else if ((ctxt->sax != NULL) &&
3997 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003998 ctxt->sax->characters(ctxt->userData,
3999 tmp, nbchar);
4000 }
4001 }
4002 return;
4003 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004004
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004005get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004006 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004007 while (test_char_data[*in]) {
4008 in++;
4009 ccol++;
4010 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004011 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004012 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004013 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004014 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004015 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004016 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004017 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004018 }
4019 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004020 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004021 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004022 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004023 return;
4024 }
4025 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004026 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004027 goto get_more;
4028 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004029 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004030 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004031 if ((ctxt->sax != NULL) &&
4032 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004033 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004034 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004035 const xmlChar *tmp = ctxt->input->cur;
4036 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004037
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004038 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004039 if (ctxt->sax->ignorableWhitespace != NULL)
4040 ctxt->sax->ignorableWhitespace(ctxt->userData,
4041 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004042 } else {
4043 if (ctxt->sax->characters != NULL)
4044 ctxt->sax->characters(ctxt->userData,
4045 tmp, nbchar);
4046 if (*ctxt->space == -1)
4047 *ctxt->space = -2;
4048 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004049 line = ctxt->input->line;
4050 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004051 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004052 if (ctxt->sax->characters != NULL)
4053 ctxt->sax->characters(ctxt->userData,
4054 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004055 line = ctxt->input->line;
4056 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004057 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004058 }
4059 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004060 if (*in == 0xD) {
4061 in++;
4062 if (*in == 0xA) {
4063 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004064 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004065 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004066 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004067 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004068 in--;
4069 }
4070 if (*in == '<') {
4071 return;
4072 }
4073 if (*in == '&') {
4074 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004075 }
4076 SHRINK;
4077 GROW;
4078 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004079 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004080 nbchar = 0;
4081 }
Daniel Veillard50582112001-03-26 22:52:16 +00004082 ctxt->input->line = line;
4083 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004084 xmlParseCharDataComplex(ctxt, cdata);
4085}
4086
Daniel Veillard01c13b52002-12-10 15:19:08 +00004087/**
4088 * xmlParseCharDataComplex:
4089 * @ctxt: an XML parser context
4090 * @cdata: int indicating whether we are within a CDATA section
4091 *
4092 * parse a CharData section.this is the fallback function
4093 * of xmlParseCharData() when the parsing requires handling
4094 * of non-ASCII characters.
4095 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004096void
4097xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004098 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4099 int nbchar = 0;
4100 int cur, l;
4101 int count = 0;
4102
4103 SHRINK;
4104 GROW;
4105 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004106 while ((cur != '<') && /* checked */
4107 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004108 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004109 if ((cur == ']') && (NXT(1) == ']') &&
4110 (NXT(2) == '>')) {
4111 if (cdata) break;
4112 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004113 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004114 }
4115 }
4116 COPY_BUF(l,buf,nbchar,cur);
4117 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004118 buf[nbchar] = 0;
4119
Owen Taylor3473f882001-02-23 17:55:21 +00004120 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004121 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004122 */
4123 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004124 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004125 if (ctxt->sax->ignorableWhitespace != NULL)
4126 ctxt->sax->ignorableWhitespace(ctxt->userData,
4127 buf, nbchar);
4128 } else {
4129 if (ctxt->sax->characters != NULL)
4130 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004131 if ((ctxt->sax->characters !=
4132 ctxt->sax->ignorableWhitespace) &&
4133 (*ctxt->space == -1))
4134 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004135 }
4136 }
4137 nbchar = 0;
4138 }
4139 count++;
4140 if (count > 50) {
4141 GROW;
4142 count = 0;
4143 }
4144 NEXTL(l);
4145 cur = CUR_CHAR(l);
4146 }
4147 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004148 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004149 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004150 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004151 */
4152 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004153 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004154 if (ctxt->sax->ignorableWhitespace != NULL)
4155 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4156 } else {
4157 if (ctxt->sax->characters != NULL)
4158 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004159 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4160 (*ctxt->space == -1))
4161 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004162 }
4163 }
4164 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004165 if ((cur != 0) && (!IS_CHAR(cur))) {
4166 /* Generate the error and skip the offending character */
4167 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4168 "PCDATA invalid Char value %d\n",
4169 cur);
4170 NEXTL(l);
4171 }
Owen Taylor3473f882001-02-23 17:55:21 +00004172}
4173
4174/**
4175 * xmlParseExternalID:
4176 * @ctxt: an XML parser context
4177 * @publicID: a xmlChar** receiving PubidLiteral
4178 * @strict: indicate whether we should restrict parsing to only
4179 * production [75], see NOTE below
4180 *
4181 * Parse an External ID or a Public ID
4182 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004183 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004184 * 'PUBLIC' S PubidLiteral S SystemLiteral
4185 *
4186 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4187 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4188 *
4189 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4190 *
4191 * Returns the function returns SystemLiteral and in the second
4192 * case publicID receives PubidLiteral, is strict is off
4193 * it is possible to return NULL and have publicID set.
4194 */
4195
4196xmlChar *
4197xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4198 xmlChar *URI = NULL;
4199
4200 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004201
4202 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004203 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004204 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004205 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004206 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4207 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004208 }
4209 SKIP_BLANKS;
4210 URI = xmlParseSystemLiteral(ctxt);
4211 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004212 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004213 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004214 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004215 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004216 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004217 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004218 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004219 }
4220 SKIP_BLANKS;
4221 *publicID = xmlParsePubidLiteral(ctxt);
4222 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004223 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004224 }
4225 if (strict) {
4226 /*
4227 * We don't handle [83] so "S SystemLiteral" is required.
4228 */
William M. Brack76e95df2003-10-18 16:20:14 +00004229 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004230 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004231 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004232 }
4233 } else {
4234 /*
4235 * We handle [83] so we return immediately, if
4236 * "S SystemLiteral" is not detected. From a purely parsing
4237 * point of view that's a nice mess.
4238 */
4239 const xmlChar *ptr;
4240 GROW;
4241
4242 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004243 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004244
William M. Brack76e95df2003-10-18 16:20:14 +00004245 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4247 }
4248 SKIP_BLANKS;
4249 URI = xmlParseSystemLiteral(ctxt);
4250 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004251 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 }
4253 }
4254 return(URI);
4255}
4256
4257/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004258 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004259 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004260 * @buf: the already parsed part of the buffer
4261 * @len: number of bytes filles in the buffer
4262 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004263 *
4264 * Skip an XML (SGML) comment <!-- .... -->
4265 * The spec says that "For compatibility, the string "--" (double-hyphen)
4266 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004267 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004268 *
4269 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4270 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004271static void
4272xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004273 int q, ql;
4274 int r, rl;
4275 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004276 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004277 int inputid;
4278
4279 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004280
Owen Taylor3473f882001-02-23 17:55:21 +00004281 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004282 len = 0;
4283 size = XML_PARSER_BUFFER_SIZE;
4284 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4285 if (buf == NULL) {
4286 xmlErrMemory(ctxt, NULL);
4287 return;
4288 }
Owen Taylor3473f882001-02-23 17:55:21 +00004289 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004290 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004291 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004292 if (q == 0)
4293 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004294 if (!IS_CHAR(q)) {
4295 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4296 "xmlParseComment: invalid xmlChar value %d\n",
4297 q);
4298 xmlFree (buf);
4299 return;
4300 }
Owen Taylor3473f882001-02-23 17:55:21 +00004301 NEXTL(ql);
4302 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004303 if (r == 0)
4304 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004305 if (!IS_CHAR(r)) {
4306 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4307 "xmlParseComment: invalid xmlChar value %d\n",
4308 q);
4309 xmlFree (buf);
4310 return;
4311 }
Owen Taylor3473f882001-02-23 17:55:21 +00004312 NEXTL(rl);
4313 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004314 if (cur == 0)
4315 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004316 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004317 ((cur != '>') ||
4318 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004319 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004320 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004321 }
4322 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004323 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004324 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004325 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326 if (new_buf == NULL) {
4327 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004328 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 return;
4330 }
William M. Bracka3215c72004-07-31 16:24:01 +00004331 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 COPY_BUF(ql,buf,len,q);
4334 q = r;
4335 ql = rl;
4336 r = cur;
4337 rl = l;
4338
4339 count++;
4340 if (count > 50) {
4341 GROW;
4342 count = 0;
4343 }
4344 NEXTL(l);
4345 cur = CUR_CHAR(l);
4346 if (cur == 0) {
4347 SHRINK;
4348 GROW;
4349 cur = CUR_CHAR(l);
4350 }
4351 }
4352 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004353 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004354 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004355 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004356 } else if (!IS_CHAR(cur)) {
4357 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4358 "xmlParseComment: invalid xmlChar value %d\n",
4359 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004360 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004361 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004362 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4363 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 NEXT;
4366 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4367 (!ctxt->disableSAX))
4368 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004369 }
Daniel Veillardda629342007-08-01 07:49:06 +00004370 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004371 return;
4372not_terminated:
4373 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4374 "Comment not terminated\n", NULL);
4375 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004376 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004377}
Daniel Veillardda629342007-08-01 07:49:06 +00004378
Daniel Veillard4c778d82005-01-23 17:37:44 +00004379/**
4380 * xmlParseComment:
4381 * @ctxt: an XML parser context
4382 *
4383 * Skip an XML (SGML) comment <!-- .... -->
4384 * The spec says that "For compatibility, the string "--" (double-hyphen)
4385 * must not occur within comments. "
4386 *
4387 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4388 */
4389void
4390xmlParseComment(xmlParserCtxtPtr ctxt) {
4391 xmlChar *buf = NULL;
4392 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004393 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004394 xmlParserInputState state;
4395 const xmlChar *in;
4396 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004397 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004398
4399 /*
4400 * Check that there is a comment right here.
4401 */
4402 if ((RAW != '<') || (NXT(1) != '!') ||
4403 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004404 state = ctxt->instate;
4405 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004406 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004407 SKIP(4);
4408 SHRINK;
4409 GROW;
4410
4411 /*
4412 * Accelerated common case where input don't need to be
4413 * modified before passing it to the handler.
4414 */
4415 in = ctxt->input->cur;
4416 do {
4417 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004418 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004419 ctxt->input->line++; ctxt->input->col = 1;
4420 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004421 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004422 }
4423get_more:
4424 ccol = ctxt->input->col;
4425 while (((*in > '-') && (*in <= 0x7F)) ||
4426 ((*in >= 0x20) && (*in < '-')) ||
4427 (*in == 0x09)) {
4428 in++;
4429 ccol++;
4430 }
4431 ctxt->input->col = ccol;
4432 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004433 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004434 ctxt->input->line++; ctxt->input->col = 1;
4435 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004436 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004437 goto get_more;
4438 }
4439 nbchar = in - ctxt->input->cur;
4440 /*
4441 * save current set of data
4442 */
4443 if (nbchar > 0) {
4444 if ((ctxt->sax != NULL) &&
4445 (ctxt->sax->comment != NULL)) {
4446 if (buf == NULL) {
4447 if ((*in == '-') && (in[1] == '-'))
4448 size = nbchar + 1;
4449 else
4450 size = XML_PARSER_BUFFER_SIZE + nbchar;
4451 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4452 if (buf == NULL) {
4453 xmlErrMemory(ctxt, NULL);
4454 ctxt->instate = state;
4455 return;
4456 }
4457 len = 0;
4458 } else if (len + nbchar + 1 >= size) {
4459 xmlChar *new_buf;
4460 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4461 new_buf = (xmlChar *) xmlRealloc(buf,
4462 size * sizeof(xmlChar));
4463 if (new_buf == NULL) {
4464 xmlFree (buf);
4465 xmlErrMemory(ctxt, NULL);
4466 ctxt->instate = state;
4467 return;
4468 }
4469 buf = new_buf;
4470 }
4471 memcpy(&buf[len], ctxt->input->cur, nbchar);
4472 len += nbchar;
4473 buf[len] = 0;
4474 }
4475 }
4476 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004477 if (*in == 0xA) {
4478 in++;
4479 ctxt->input->line++; ctxt->input->col = 1;
4480 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004481 if (*in == 0xD) {
4482 in++;
4483 if (*in == 0xA) {
4484 ctxt->input->cur = in;
4485 in++;
4486 ctxt->input->line++; ctxt->input->col = 1;
4487 continue; /* while */
4488 }
4489 in--;
4490 }
4491 SHRINK;
4492 GROW;
4493 in = ctxt->input->cur;
4494 if (*in == '-') {
4495 if (in[1] == '-') {
4496 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004497 if (ctxt->input->id != inputid) {
4498 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4499 "comment doesn't start and stop in the same entity\n");
4500 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004501 SKIP(3);
4502 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4503 (!ctxt->disableSAX)) {
4504 if (buf != NULL)
4505 ctxt->sax->comment(ctxt->userData, buf);
4506 else
4507 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4508 }
4509 if (buf != NULL)
4510 xmlFree(buf);
4511 ctxt->instate = state;
4512 return;
4513 }
4514 if (buf != NULL)
4515 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4516 "Comment not terminated \n<!--%.50s\n",
4517 buf);
4518 else
4519 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4520 "Comment not terminated \n", NULL);
4521 in++;
4522 ctxt->input->col++;
4523 }
4524 in++;
4525 ctxt->input->col++;
4526 goto get_more;
4527 }
4528 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4529 xmlParseCommentComplex(ctxt, buf, len, size);
4530 ctxt->instate = state;
4531 return;
4532}
4533
Owen Taylor3473f882001-02-23 17:55:21 +00004534
4535/**
4536 * xmlParsePITarget:
4537 * @ctxt: an XML parser context
4538 *
4539 * parse the name of a PI
4540 *
4541 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4542 *
4543 * Returns the PITarget name or NULL
4544 */
4545
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004546const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004547xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004548 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004549
4550 name = xmlParseName(ctxt);
4551 if ((name != NULL) &&
4552 ((name[0] == 'x') || (name[0] == 'X')) &&
4553 ((name[1] == 'm') || (name[1] == 'M')) &&
4554 ((name[2] == 'l') || (name[2] == 'L'))) {
4555 int i;
4556 if ((name[0] == 'x') && (name[1] == 'm') &&
4557 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004558 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004559 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004560 return(name);
4561 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004562 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004563 return(name);
4564 }
4565 for (i = 0;;i++) {
4566 if (xmlW3CPIs[i] == NULL) break;
4567 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4568 return(name);
4569 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004570 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4571 "xmlParsePITarget: invalid name prefix 'xml'\n",
4572 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004573 }
Daniel Veillard37334572008-07-31 08:20:02 +00004574 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4575 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4576 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4577 }
Owen Taylor3473f882001-02-23 17:55:21 +00004578 return(name);
4579}
4580
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004581#ifdef LIBXML_CATALOG_ENABLED
4582/**
4583 * xmlParseCatalogPI:
4584 * @ctxt: an XML parser context
4585 * @catalog: the PI value string
4586 *
4587 * parse an XML Catalog Processing Instruction.
4588 *
4589 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4590 *
4591 * Occurs only if allowed by the user and if happening in the Misc
4592 * part of the document before any doctype informations
4593 * This will add the given catalog to the parsing context in order
4594 * to be used if there is a resolution need further down in the document
4595 */
4596
4597static void
4598xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4599 xmlChar *URL = NULL;
4600 const xmlChar *tmp, *base;
4601 xmlChar marker;
4602
4603 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004604 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004605 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4606 goto error;
4607 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004608 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004609 if (*tmp != '=') {
4610 return;
4611 }
4612 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004613 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004614 marker = *tmp;
4615 if ((marker != '\'') && (marker != '"'))
4616 goto error;
4617 tmp++;
4618 base = tmp;
4619 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4620 if (*tmp == 0)
4621 goto error;
4622 URL = xmlStrndup(base, tmp - base);
4623 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004624 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004625 if (*tmp != 0)
4626 goto error;
4627
4628 if (URL != NULL) {
4629 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4630 xmlFree(URL);
4631 }
4632 return;
4633
4634error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004635 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4636 "Catalog PI syntax error: %s\n",
4637 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004638 if (URL != NULL)
4639 xmlFree(URL);
4640}
4641#endif
4642
Owen Taylor3473f882001-02-23 17:55:21 +00004643/**
4644 * xmlParsePI:
4645 * @ctxt: an XML parser context
4646 *
4647 * parse an XML Processing Instruction.
4648 *
4649 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4650 *
4651 * The processing is transfered to SAX once parsed.
4652 */
4653
4654void
4655xmlParsePI(xmlParserCtxtPtr ctxt) {
4656 xmlChar *buf = NULL;
4657 int len = 0;
4658 int size = XML_PARSER_BUFFER_SIZE;
4659 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004660 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 xmlParserInputState state;
4662 int count = 0;
4663
4664 if ((RAW == '<') && (NXT(1) == '?')) {
4665 xmlParserInputPtr input = ctxt->input;
4666 state = ctxt->instate;
4667 ctxt->instate = XML_PARSER_PI;
4668 /*
4669 * this is a Processing Instruction.
4670 */
4671 SKIP(2);
4672 SHRINK;
4673
4674 /*
4675 * Parse the target name and check for special support like
4676 * namespace.
4677 */
4678 target = xmlParsePITarget(ctxt);
4679 if (target != NULL) {
4680 if ((RAW == '?') && (NXT(1) == '>')) {
4681 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004682 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4683 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 SKIP(2);
4686
4687 /*
4688 * SAX: PI detected.
4689 */
4690 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4691 (ctxt->sax->processingInstruction != NULL))
4692 ctxt->sax->processingInstruction(ctxt->userData,
4693 target, NULL);
4694 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004695 return;
4696 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004697 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004698 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004699 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004700 ctxt->instate = state;
4701 return;
4702 }
4703 cur = CUR;
4704 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004705 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4706 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 }
4708 SKIP_BLANKS;
4709 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004710 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004711 ((cur != '?') || (NXT(1) != '>'))) {
4712 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004713 xmlChar *tmp;
4714
Owen Taylor3473f882001-02-23 17:55:21 +00004715 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004716 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4717 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004718 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004719 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004720 ctxt->instate = state;
4721 return;
4722 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004723 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004724 }
4725 count++;
4726 if (count > 50) {
4727 GROW;
4728 count = 0;
4729 }
4730 COPY_BUF(l,buf,len,cur);
4731 NEXTL(l);
4732 cur = CUR_CHAR(l);
4733 if (cur == 0) {
4734 SHRINK;
4735 GROW;
4736 cur = CUR_CHAR(l);
4737 }
4738 }
4739 buf[len] = 0;
4740 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004741 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4742 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 } else {
4744 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004745 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4746 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004747 }
4748 SKIP(2);
4749
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004750#ifdef LIBXML_CATALOG_ENABLED
4751 if (((state == XML_PARSER_MISC) ||
4752 (state == XML_PARSER_START)) &&
4753 (xmlStrEqual(target, XML_CATALOG_PI))) {
4754 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4755 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4756 (allow == XML_CATA_ALLOW_ALL))
4757 xmlParseCatalogPI(ctxt, buf);
4758 }
4759#endif
4760
4761
Owen Taylor3473f882001-02-23 17:55:21 +00004762 /*
4763 * SAX: PI detected.
4764 */
4765 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4766 (ctxt->sax->processingInstruction != NULL))
4767 ctxt->sax->processingInstruction(ctxt->userData,
4768 target, buf);
4769 }
4770 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004771 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004772 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004773 }
4774 ctxt->instate = state;
4775 }
4776}
4777
4778/**
4779 * xmlParseNotationDecl:
4780 * @ctxt: an XML parser context
4781 *
4782 * parse a notation declaration
4783 *
4784 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4785 *
4786 * Hence there is actually 3 choices:
4787 * 'PUBLIC' S PubidLiteral
4788 * 'PUBLIC' S PubidLiteral S SystemLiteral
4789 * and 'SYSTEM' S SystemLiteral
4790 *
4791 * See the NOTE on xmlParseExternalID().
4792 */
4793
4794void
4795xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004796 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004797 xmlChar *Pubid;
4798 xmlChar *Systemid;
4799
Daniel Veillarda07050d2003-10-19 14:46:32 +00004800 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004801 xmlParserInputPtr input = ctxt->input;
4802 SHRINK;
4803 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004804 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004805 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4806 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004807 return;
4808 }
4809 SKIP_BLANKS;
4810
Daniel Veillard76d66f42001-05-16 21:05:17 +00004811 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004812 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004813 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004814 return;
4815 }
William M. Brack76e95df2003-10-18 16:20:14 +00004816 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004817 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004818 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004819 return;
4820 }
Daniel Veillard37334572008-07-31 08:20:02 +00004821 if (xmlStrchr(name, ':') != NULL) {
4822 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4823 "colon are forbidden from notation names '%s'\n",
4824 name, NULL, NULL);
4825 }
Owen Taylor3473f882001-02-23 17:55:21 +00004826 SKIP_BLANKS;
4827
4828 /*
4829 * Parse the IDs.
4830 */
4831 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4832 SKIP_BLANKS;
4833
4834 if (RAW == '>') {
4835 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004836 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4837 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004838 }
4839 NEXT;
4840 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4841 (ctxt->sax->notationDecl != NULL))
4842 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4843 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004844 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004845 }
Owen Taylor3473f882001-02-23 17:55:21 +00004846 if (Systemid != NULL) xmlFree(Systemid);
4847 if (Pubid != NULL) xmlFree(Pubid);
4848 }
4849}
4850
4851/**
4852 * xmlParseEntityDecl:
4853 * @ctxt: an XML parser context
4854 *
4855 * parse <!ENTITY declarations
4856 *
4857 * [70] EntityDecl ::= GEDecl | PEDecl
4858 *
4859 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4860 *
4861 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4862 *
4863 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4864 *
4865 * [74] PEDef ::= EntityValue | ExternalID
4866 *
4867 * [76] NDataDecl ::= S 'NDATA' S Name
4868 *
4869 * [ VC: Notation Declared ]
4870 * The Name must match the declared name of a notation.
4871 */
4872
4873void
4874xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004875 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004876 xmlChar *value = NULL;
4877 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004878 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004879 int isParameter = 0;
4880 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004881 int skipped;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00004882 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00004883
Daniel Veillard4c778d82005-01-23 17:37:44 +00004884 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004885 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004887 SHRINK;
4888 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004889 skipped = SKIP_BLANKS;
4890 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004891 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4892 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004893 }
Owen Taylor3473f882001-02-23 17:55:21 +00004894
4895 if (RAW == '%') {
4896 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004897 skipped = SKIP_BLANKS;
4898 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4900 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004901 }
Owen Taylor3473f882001-02-23 17:55:21 +00004902 isParameter = 1;
4903 }
4904
Daniel Veillard76d66f42001-05-16 21:05:17 +00004905 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004906 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004907 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4908 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004909 return;
4910 }
Daniel Veillard37334572008-07-31 08:20:02 +00004911 if (xmlStrchr(name, ':') != NULL) {
4912 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4913 "colon are forbidden from entities names '%s'\n",
4914 name, NULL, NULL);
4915 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004916 skipped = SKIP_BLANKS;
4917 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004918 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4919 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004920 }
Owen Taylor3473f882001-02-23 17:55:21 +00004921
Daniel Veillardf5582f12002-06-11 10:08:16 +00004922 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 /*
4924 * handle the various case of definitions...
4925 */
4926 if (isParameter) {
4927 if ((RAW == '"') || (RAW == '\'')) {
4928 value = xmlParseEntityValue(ctxt, &orig);
4929 if (value) {
4930 if ((ctxt->sax != NULL) &&
4931 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4932 ctxt->sax->entityDecl(ctxt->userData, name,
4933 XML_INTERNAL_PARAMETER_ENTITY,
4934 NULL, NULL, value);
4935 }
4936 } else {
4937 URI = xmlParseExternalID(ctxt, &literal, 1);
4938 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004939 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004940 }
4941 if (URI) {
4942 xmlURIPtr uri;
4943
4944 uri = xmlParseURI((const char *) URI);
4945 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004946 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4947 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004948 /*
4949 * This really ought to be a well formedness error
4950 * but the XML Core WG decided otherwise c.f. issue
4951 * E26 of the XML erratas.
4952 */
Owen Taylor3473f882001-02-23 17:55:21 +00004953 } else {
4954 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004955 /*
4956 * Okay this is foolish to block those but not
4957 * invalid URIs.
4958 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004959 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004960 } else {
4961 if ((ctxt->sax != NULL) &&
4962 (!ctxt->disableSAX) &&
4963 (ctxt->sax->entityDecl != NULL))
4964 ctxt->sax->entityDecl(ctxt->userData, name,
4965 XML_EXTERNAL_PARAMETER_ENTITY,
4966 literal, URI, NULL);
4967 }
4968 xmlFreeURI(uri);
4969 }
4970 }
4971 }
4972 } else {
4973 if ((RAW == '"') || (RAW == '\'')) {
4974 value = xmlParseEntityValue(ctxt, &orig);
4975 if ((ctxt->sax != NULL) &&
4976 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4977 ctxt->sax->entityDecl(ctxt->userData, name,
4978 XML_INTERNAL_GENERAL_ENTITY,
4979 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004980 /*
4981 * For expat compatibility in SAX mode.
4982 */
4983 if ((ctxt->myDoc == NULL) ||
4984 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4985 if (ctxt->myDoc == NULL) {
4986 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004987 if (ctxt->myDoc == NULL) {
4988 xmlErrMemory(ctxt, "New Doc failed");
4989 return;
4990 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00004991 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00004992 }
4993 if (ctxt->myDoc->intSubset == NULL)
4994 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4995 BAD_CAST "fake", NULL, NULL);
4996
Daniel Veillard1af9a412003-08-20 22:54:39 +00004997 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4998 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004999 }
Owen Taylor3473f882001-02-23 17:55:21 +00005000 } else {
5001 URI = xmlParseExternalID(ctxt, &literal, 1);
5002 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005003 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005004 }
5005 if (URI) {
5006 xmlURIPtr uri;
5007
5008 uri = xmlParseURI((const char *)URI);
5009 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005010 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5011 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005012 /*
5013 * This really ought to be a well formedness error
5014 * but the XML Core WG decided otherwise c.f. issue
5015 * E26 of the XML erratas.
5016 */
Owen Taylor3473f882001-02-23 17:55:21 +00005017 } else {
5018 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005019 /*
5020 * Okay this is foolish to block those but not
5021 * invalid URIs.
5022 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005023 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 }
5025 xmlFreeURI(uri);
5026 }
5027 }
William M. Brack76e95df2003-10-18 16:20:14 +00005028 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5030 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
5032 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005033 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005034 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005035 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5037 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005038 }
5039 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005040 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005041 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5042 (ctxt->sax->unparsedEntityDecl != NULL))
5043 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5044 literal, URI, ndata);
5045 } else {
5046 if ((ctxt->sax != NULL) &&
5047 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5048 ctxt->sax->entityDecl(ctxt->userData, name,
5049 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5050 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005051 /*
5052 * For expat compatibility in SAX mode.
5053 * assuming the entity repalcement was asked for
5054 */
5055 if ((ctxt->replaceEntities != 0) &&
5056 ((ctxt->myDoc == NULL) ||
5057 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5058 if (ctxt->myDoc == NULL) {
5059 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005060 if (ctxt->myDoc == NULL) {
5061 xmlErrMemory(ctxt, "New Doc failed");
5062 return;
5063 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005064 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005065 }
5066
5067 if (ctxt->myDoc->intSubset == NULL)
5068 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5069 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005070 xmlSAX2EntityDecl(ctxt, name,
5071 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5072 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005073 }
Owen Taylor3473f882001-02-23 17:55:21 +00005074 }
5075 }
5076 }
5077 SKIP_BLANKS;
5078 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005079 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005080 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005081 } else {
5082 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005083 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5084 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005085 }
5086 NEXT;
5087 }
5088 if (orig != NULL) {
5089 /*
5090 * Ugly mechanism to save the raw entity value.
5091 */
5092 xmlEntityPtr cur = NULL;
5093
5094 if (isParameter) {
5095 if ((ctxt->sax != NULL) &&
5096 (ctxt->sax->getParameterEntity != NULL))
5097 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5098 } else {
5099 if ((ctxt->sax != NULL) &&
5100 (ctxt->sax->getEntity != NULL))
5101 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005102 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005103 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005104 }
Owen Taylor3473f882001-02-23 17:55:21 +00005105 }
5106 if (cur != NULL) {
Daniel Veillardf4f4e482008-08-25 08:57:48 +00005107 cur->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 if (cur->orig != NULL)
5109 xmlFree(orig);
5110 else
5111 cur->orig = orig;
5112 } else
5113 xmlFree(orig);
5114 }
Owen Taylor3473f882001-02-23 17:55:21 +00005115 if (value != NULL) xmlFree(value);
5116 if (URI != NULL) xmlFree(URI);
5117 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005118 }
5119}
5120
5121/**
5122 * xmlParseDefaultDecl:
5123 * @ctxt: an XML parser context
5124 * @value: Receive a possible fixed default value for the attribute
5125 *
5126 * Parse an attribute default declaration
5127 *
5128 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5129 *
5130 * [ VC: Required Attribute ]
5131 * if the default declaration is the keyword #REQUIRED, then the
5132 * attribute must be specified for all elements of the type in the
5133 * attribute-list declaration.
5134 *
5135 * [ VC: Attribute Default Legal ]
5136 * The declared default value must meet the lexical constraints of
5137 * the declared attribute type c.f. xmlValidateAttributeDecl()
5138 *
5139 * [ VC: Fixed Attribute Default ]
5140 * if an attribute has a default value declared with the #FIXED
5141 * keyword, instances of that attribute must match the default value.
5142 *
5143 * [ WFC: No < in Attribute Values ]
5144 * handled in xmlParseAttValue()
5145 *
5146 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5147 * or XML_ATTRIBUTE_FIXED.
5148 */
5149
5150int
5151xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5152 int val;
5153 xmlChar *ret;
5154
5155 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005156 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005157 SKIP(9);
5158 return(XML_ATTRIBUTE_REQUIRED);
5159 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005160 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005161 SKIP(8);
5162 return(XML_ATTRIBUTE_IMPLIED);
5163 }
5164 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005165 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005166 SKIP(6);
5167 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005168 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005169 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5170 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005171 }
5172 SKIP_BLANKS;
5173 }
5174 ret = xmlParseAttValue(ctxt);
5175 ctxt->instate = XML_PARSER_DTD;
5176 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005177 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005178 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005179 } else
5180 *value = ret;
5181 return(val);
5182}
5183
5184/**
5185 * xmlParseNotationType:
5186 * @ctxt: an XML parser context
5187 *
5188 * parse an Notation attribute type.
5189 *
5190 * Note: the leading 'NOTATION' S part has already being parsed...
5191 *
5192 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5193 *
5194 * [ VC: Notation Attributes ]
5195 * Values of this type must match one of the notation names included
5196 * in the declaration; all notation names in the declaration must be declared.
5197 *
5198 * Returns: the notation attribute tree built while parsing
5199 */
5200
5201xmlEnumerationPtr
5202xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005203 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5205
5206 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005207 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 return(NULL);
5209 }
5210 SHRINK;
5211 do {
5212 NEXT;
5213 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005214 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005215 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005216 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5217 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005218 return(ret);
5219 }
5220 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 if (cur == NULL) return(ret);
5222 if (last == NULL) ret = last = cur;
5223 else {
5224 last->next = cur;
5225 last = cur;
5226 }
5227 SKIP_BLANKS;
5228 } while (RAW == '|');
5229 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005230 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 if ((last != NULL) && (last != ret))
5232 xmlFreeEnumeration(last);
5233 return(ret);
5234 }
5235 NEXT;
5236 return(ret);
5237}
5238
5239/**
5240 * xmlParseEnumerationType:
5241 * @ctxt: an XML parser context
5242 *
5243 * parse an Enumeration attribute type.
5244 *
5245 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5246 *
5247 * [ VC: Enumeration ]
5248 * Values of this type must match one of the Nmtoken tokens in
5249 * the declaration
5250 *
5251 * Returns: the enumeration attribute tree built while parsing
5252 */
5253
5254xmlEnumerationPtr
5255xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5256 xmlChar *name;
5257 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5258
5259 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005260 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005261 return(NULL);
5262 }
5263 SHRINK;
5264 do {
5265 NEXT;
5266 SKIP_BLANKS;
5267 name = xmlParseNmtoken(ctxt);
5268 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005269 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return(ret);
5271 }
5272 cur = xmlCreateEnumeration(name);
5273 xmlFree(name);
5274 if (cur == NULL) return(ret);
5275 if (last == NULL) ret = last = cur;
5276 else {
5277 last->next = cur;
5278 last = cur;
5279 }
5280 SKIP_BLANKS;
5281 } while (RAW == '|');
5282 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005283 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 return(ret);
5285 }
5286 NEXT;
5287 return(ret);
5288}
5289
5290/**
5291 * xmlParseEnumeratedType:
5292 * @ctxt: an XML parser context
5293 * @tree: the enumeration tree built while parsing
5294 *
5295 * parse an Enumerated attribute type.
5296 *
5297 * [57] EnumeratedType ::= NotationType | Enumeration
5298 *
5299 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5300 *
5301 *
5302 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5303 */
5304
5305int
5306xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005307 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005308 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005309 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5311 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005312 return(0);
5313 }
5314 SKIP_BLANKS;
5315 *tree = xmlParseNotationType(ctxt);
5316 if (*tree == NULL) return(0);
5317 return(XML_ATTRIBUTE_NOTATION);
5318 }
5319 *tree = xmlParseEnumerationType(ctxt);
5320 if (*tree == NULL) return(0);
5321 return(XML_ATTRIBUTE_ENUMERATION);
5322}
5323
5324/**
5325 * xmlParseAttributeType:
5326 * @ctxt: an XML parser context
5327 * @tree: the enumeration tree built while parsing
5328 *
5329 * parse the Attribute list def for an element
5330 *
5331 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5332 *
5333 * [55] StringType ::= 'CDATA'
5334 *
5335 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5336 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5337 *
5338 * Validity constraints for attribute values syntax are checked in
5339 * xmlValidateAttributeValue()
5340 *
5341 * [ VC: ID ]
5342 * Values of type ID must match the Name production. A name must not
5343 * appear more than once in an XML document as a value of this type;
5344 * i.e., ID values must uniquely identify the elements which bear them.
5345 *
5346 * [ VC: One ID per Element Type ]
5347 * No element type may have more than one ID attribute specified.
5348 *
5349 * [ VC: ID Attribute Default ]
5350 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5351 *
5352 * [ VC: IDREF ]
5353 * Values of type IDREF must match the Name production, and values
5354 * of type IDREFS must match Names; each IDREF Name must match the value
5355 * of an ID attribute on some element in the XML document; i.e. IDREF
5356 * values must match the value of some ID attribute.
5357 *
5358 * [ VC: Entity Name ]
5359 * Values of type ENTITY must match the Name production, values
5360 * of type ENTITIES must match Names; each Entity Name must match the
5361 * name of an unparsed entity declared in the DTD.
5362 *
5363 * [ VC: Name Token ]
5364 * Values of type NMTOKEN must match the Nmtoken production; values
5365 * of type NMTOKENS must match Nmtokens.
5366 *
5367 * Returns the attribute type
5368 */
5369int
5370xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5371 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005372 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005373 SKIP(5);
5374 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005375 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005376 SKIP(6);
5377 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005378 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005379 SKIP(5);
5380 return(XML_ATTRIBUTE_IDREF);
5381 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5382 SKIP(2);
5383 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005384 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005385 SKIP(6);
5386 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005387 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005388 SKIP(8);
5389 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005390 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005391 SKIP(8);
5392 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005393 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005394 SKIP(7);
5395 return(XML_ATTRIBUTE_NMTOKEN);
5396 }
5397 return(xmlParseEnumeratedType(ctxt, tree));
5398}
5399
5400/**
5401 * xmlParseAttributeListDecl:
5402 * @ctxt: an XML parser context
5403 *
5404 * : parse the Attribute list def for an element
5405 *
5406 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5407 *
5408 * [53] AttDef ::= S Name S AttType S DefaultDecl
5409 *
5410 */
5411void
5412xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005413 const xmlChar *elemName;
5414 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005415 xmlEnumerationPtr tree;
5416
Daniel Veillarda07050d2003-10-19 14:46:32 +00005417 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005418 xmlParserInputPtr input = ctxt->input;
5419
5420 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005421 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005423 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005424 }
5425 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005426 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005428 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5429 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005430 return;
5431 }
5432 SKIP_BLANKS;
5433 GROW;
5434 while (RAW != '>') {
5435 const xmlChar *check = CUR_PTR;
5436 int type;
5437 int def;
5438 xmlChar *defaultValue = NULL;
5439
5440 GROW;
5441 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005442 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005443 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5445 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005446 break;
5447 }
5448 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005449 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005451 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005452 break;
5453 }
5454 SKIP_BLANKS;
5455
5456 type = xmlParseAttributeType(ctxt, &tree);
5457 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005458 break;
5459 }
5460
5461 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005462 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005463 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5464 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005465 if (tree != NULL)
5466 xmlFreeEnumeration(tree);
5467 break;
5468 }
5469 SKIP_BLANKS;
5470
5471 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5472 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005473 if (defaultValue != NULL)
5474 xmlFree(defaultValue);
5475 if (tree != NULL)
5476 xmlFreeEnumeration(tree);
5477 break;
5478 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005479 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5480 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005481
5482 GROW;
5483 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005484 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005485 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005486 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005487 if (defaultValue != NULL)
5488 xmlFree(defaultValue);
5489 if (tree != NULL)
5490 xmlFreeEnumeration(tree);
5491 break;
5492 }
5493 SKIP_BLANKS;
5494 }
5495 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005496 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5497 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (defaultValue != NULL)
5499 xmlFree(defaultValue);
5500 if (tree != NULL)
5501 xmlFreeEnumeration(tree);
5502 break;
5503 }
5504 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5505 (ctxt->sax->attributeDecl != NULL))
5506 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5507 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005508 else if (tree != NULL)
5509 xmlFreeEnumeration(tree);
5510
5511 if ((ctxt->sax2) && (defaultValue != NULL) &&
5512 (def != XML_ATTRIBUTE_IMPLIED) &&
5513 (def != XML_ATTRIBUTE_REQUIRED)) {
5514 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5515 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005516 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005517 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5518 }
Owen Taylor3473f882001-02-23 17:55:21 +00005519 if (defaultValue != NULL)
5520 xmlFree(defaultValue);
5521 GROW;
5522 }
5523 if (RAW == '>') {
5524 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005525 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5526 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005527 }
5528 NEXT;
5529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
5531}
5532
5533/**
5534 * xmlParseElementMixedContentDecl:
5535 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005536 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005537 *
5538 * parse the declaration for a Mixed Element content
5539 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5540 *
5541 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5542 * '(' S? '#PCDATA' S? ')'
5543 *
5544 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5545 *
5546 * [ VC: No Duplicate Types ]
5547 * The same name must not appear more than once in a single
5548 * mixed-content declaration.
5549 *
5550 * returns: the list of the xmlElementContentPtr describing the element choices
5551 */
5552xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005553xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005554 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005555 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005556
5557 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005558 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005559 SKIP(7);
5560 SKIP_BLANKS;
5561 SHRINK;
5562 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005563 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005564 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5565"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005566 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005567 }
Owen Taylor3473f882001-02-23 17:55:21 +00005568 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005569 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005570 if (ret == NULL)
5571 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005572 if (RAW == '*') {
5573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5574 NEXT;
5575 }
5576 return(ret);
5577 }
5578 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005579 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 if (ret == NULL) return(NULL);
5581 }
5582 while (RAW == '|') {
5583 NEXT;
5584 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005585 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 if (ret == NULL) return(NULL);
5587 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005588 if (cur != NULL)
5589 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005590 cur = ret;
5591 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005592 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005593 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005594 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005595 if (n->c1 != NULL)
5596 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005597 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005598 if (n != NULL)
5599 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005600 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005601 }
5602 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005603 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005604 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005605 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005606 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005607 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005608 return(NULL);
5609 }
5610 SKIP_BLANKS;
5611 GROW;
5612 }
5613 if ((RAW == ')') && (NXT(1) == '*')) {
5614 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005615 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005616 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005617 if (cur->c2 != NULL)
5618 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005619 }
5620 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005621 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005622 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5623"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005624 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005625 }
Owen Taylor3473f882001-02-23 17:55:21 +00005626 SKIP(2);
5627 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005628 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005629 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005630 return(NULL);
5631 }
5632
5633 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005634 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005635 }
5636 return(ret);
5637}
5638
5639/**
5640 * xmlParseElementChildrenContentDecl:
5641 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005642 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005643 *
5644 * parse the declaration for a Mixed Element content
5645 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5646 *
5647 *
5648 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5649 *
5650 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5651 *
5652 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5653 *
5654 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5655 *
5656 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5657 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005658 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005659 * opening or closing parentheses in a choice, seq, or Mixed
5660 * construct is contained in the replacement text for a parameter
5661 * entity, both must be contained in the same replacement text. For
5662 * interoperability, if a parameter-entity reference appears in a
5663 * choice, seq, or Mixed construct, its replacement text should not
5664 * be empty, and neither the first nor last non-blank character of
5665 * the replacement text should be a connector (| or ,).
5666 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005667 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005668 * hierarchy.
5669 */
5670xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005671xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005672 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005673 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005674 xmlChar type = 0;
5675
5676 SKIP_BLANKS;
5677 GROW;
5678 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005679 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005680
Owen Taylor3473f882001-02-23 17:55:21 +00005681 /* Recurse on first child */
5682 NEXT;
5683 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005684 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005685 SKIP_BLANKS;
5686 GROW;
5687 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005688 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005689 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005690 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005691 return(NULL);
5692 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005693 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005694 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005695 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005696 return(NULL);
5697 }
Owen Taylor3473f882001-02-23 17:55:21 +00005698 GROW;
5699 if (RAW == '?') {
5700 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5701 NEXT;
5702 } else if (RAW == '*') {
5703 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5704 NEXT;
5705 } else if (RAW == '+') {
5706 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5707 NEXT;
5708 } else {
5709 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5710 }
Owen Taylor3473f882001-02-23 17:55:21 +00005711 GROW;
5712 }
5713 SKIP_BLANKS;
5714 SHRINK;
5715 while (RAW != ')') {
5716 /*
5717 * Each loop we parse one separator and one element.
5718 */
5719 if (RAW == ',') {
5720 if (type == 0) type = CUR;
5721
5722 /*
5723 * Detect "Name | Name , Name" error
5724 */
5725 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005726 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005727 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005728 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005729 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005732 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005733 return(NULL);
5734 }
5735 NEXT;
5736
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005737 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005739 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005740 xmlFreeDocElementContent(ctxt->myDoc, last);
5741 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 return(NULL);
5743 }
5744 if (last == NULL) {
5745 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005746 if (ret != NULL)
5747 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005748 ret = cur = op;
5749 } else {
5750 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005751 if (op != NULL)
5752 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005753 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005754 if (last != NULL)
5755 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005756 cur =op;
5757 last = NULL;
5758 }
5759 } else if (RAW == '|') {
5760 if (type == 0) type = CUR;
5761
5762 /*
5763 * Detect "Name , Name | Name" error
5764 */
5765 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005766 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005767 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005768 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005769 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005770 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005771 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005772 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 return(NULL);
5774 }
5775 NEXT;
5776
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005777 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005778 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005779 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005780 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005781 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005782 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 return(NULL);
5784 }
5785 if (last == NULL) {
5786 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005787 if (ret != NULL)
5788 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005789 ret = cur = op;
5790 } else {
5791 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005792 if (op != NULL)
5793 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005794 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005795 if (last != NULL)
5796 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005797 cur =op;
5798 last = NULL;
5799 }
5800 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005802 if ((last != NULL) && (last != ret))
5803 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005804 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005805 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 return(NULL);
5807 }
5808 GROW;
5809 SKIP_BLANKS;
5810 GROW;
5811 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005812 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005813 /* Recurse on second child */
5814 NEXT;
5815 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005816 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005817 SKIP_BLANKS;
5818 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005819 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005820 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005821 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005822 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005823 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005824 return(NULL);
5825 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005826 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005827 if (last == NULL) {
5828 if (ret != NULL)
5829 xmlFreeDocElementContent(ctxt->myDoc, ret);
5830 return(NULL);
5831 }
Owen Taylor3473f882001-02-23 17:55:21 +00005832 if (RAW == '?') {
5833 last->ocur = XML_ELEMENT_CONTENT_OPT;
5834 NEXT;
5835 } else if (RAW == '*') {
5836 last->ocur = XML_ELEMENT_CONTENT_MULT;
5837 NEXT;
5838 } else if (RAW == '+') {
5839 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5840 NEXT;
5841 } else {
5842 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5843 }
5844 }
5845 SKIP_BLANKS;
5846 GROW;
5847 }
5848 if ((cur != NULL) && (last != NULL)) {
5849 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005850 if (last != NULL)
5851 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005852 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005853 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005854 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5855"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005856 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005857 }
Owen Taylor3473f882001-02-23 17:55:21 +00005858 NEXT;
5859 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005860 if (ret != NULL) {
5861 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5862 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5863 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5864 else
5865 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5866 }
Owen Taylor3473f882001-02-23 17:55:21 +00005867 NEXT;
5868 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005869 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005870 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005871 cur = ret;
5872 /*
5873 * Some normalization:
5874 * (a | b* | c?)* == (a | b | c)*
5875 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005876 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005877 if ((cur->c1 != NULL) &&
5878 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5879 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5880 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5881 if ((cur->c2 != NULL) &&
5882 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5883 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5884 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5885 cur = cur->c2;
5886 }
5887 }
Owen Taylor3473f882001-02-23 17:55:21 +00005888 NEXT;
5889 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005890 if (ret != NULL) {
5891 int found = 0;
5892
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005893 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5894 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5895 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005896 else
5897 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005898 /*
5899 * Some normalization:
5900 * (a | b*)+ == (a | b)*
5901 * (a | b?)+ == (a | b)*
5902 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005903 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005904 if ((cur->c1 != NULL) &&
5905 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5906 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5907 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5908 found = 1;
5909 }
5910 if ((cur->c2 != NULL) &&
5911 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5912 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5913 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5914 found = 1;
5915 }
5916 cur = cur->c2;
5917 }
5918 if (found)
5919 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5920 }
Owen Taylor3473f882001-02-23 17:55:21 +00005921 NEXT;
5922 }
5923 return(ret);
5924}
5925
5926/**
5927 * xmlParseElementContentDecl:
5928 * @ctxt: an XML parser context
5929 * @name: the name of the element being defined.
5930 * @result: the Element Content pointer will be stored here if any
5931 *
5932 * parse the declaration for an Element content either Mixed or Children,
5933 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5934 *
5935 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5936 *
5937 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5938 */
5939
5940int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005941xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005942 xmlElementContentPtr *result) {
5943
5944 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005945 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005946 int res;
5947
5948 *result = NULL;
5949
5950 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005951 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005952 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005953 return(-1);
5954 }
5955 NEXT;
5956 GROW;
5957 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005958 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005959 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 res = XML_ELEMENT_TYPE_MIXED;
5961 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005962 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005963 res = XML_ELEMENT_TYPE_ELEMENT;
5964 }
Owen Taylor3473f882001-02-23 17:55:21 +00005965 SKIP_BLANKS;
5966 *result = tree;
5967 return(res);
5968}
5969
5970/**
5971 * xmlParseElementDecl:
5972 * @ctxt: an XML parser context
5973 *
5974 * parse an Element declaration.
5975 *
5976 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5977 *
5978 * [ VC: Unique Element Type Declaration ]
5979 * No element type may be declared more than once
5980 *
5981 * Returns the type of the element, or -1 in case of error
5982 */
5983int
5984xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005985 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005986 int ret = -1;
5987 xmlElementContentPtr content = NULL;
5988
Daniel Veillard4c778d82005-01-23 17:37:44 +00005989 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005990 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005991 xmlParserInputPtr input = ctxt->input;
5992
5993 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005994 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5996 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005997 }
5998 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005999 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006001 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6002 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006003 return(-1);
6004 }
6005 while ((RAW == 0) && (ctxt->inputNr > 1))
6006 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006007 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006010 }
6011 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006012 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006013 SKIP(5);
6014 /*
6015 * Element must always be empty.
6016 */
6017 ret = XML_ELEMENT_TYPE_EMPTY;
6018 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6019 (NXT(2) == 'Y')) {
6020 SKIP(3);
6021 /*
6022 * Element is a generic container.
6023 */
6024 ret = XML_ELEMENT_TYPE_ANY;
6025 } else if (RAW == '(') {
6026 ret = xmlParseElementContentDecl(ctxt, name, &content);
6027 } else {
6028 /*
6029 * [ WFC: PEs in Internal Subset ] error handling.
6030 */
6031 if ((RAW == '%') && (ctxt->external == 0) &&
6032 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006033 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006034 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006035 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006036 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006037 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6038 }
Owen Taylor3473f882001-02-23 17:55:21 +00006039 return(-1);
6040 }
6041
6042 SKIP_BLANKS;
6043 /*
6044 * Pop-up of finished entities.
6045 */
6046 while ((RAW == 0) && (ctxt->inputNr > 1))
6047 xmlPopInput(ctxt);
6048 SKIP_BLANKS;
6049
6050 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006051 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006052 if (content != NULL) {
6053 xmlFreeDocElementContent(ctxt->myDoc, content);
6054 }
Owen Taylor3473f882001-02-23 17:55:21 +00006055 } else {
6056 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006057 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6058 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006059 }
6060
6061 NEXT;
6062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006063 (ctxt->sax->elementDecl != NULL)) {
6064 if (content != NULL)
6065 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006066 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6067 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006068 if ((content != NULL) && (content->parent == NULL)) {
6069 /*
6070 * this is a trick: if xmlAddElementDecl is called,
6071 * instead of copying the full tree it is plugged directly
6072 * if called from the parser. Avoid duplicating the
6073 * interfaces or change the API/ABI
6074 */
6075 xmlFreeDocElementContent(ctxt->myDoc, content);
6076 }
6077 } else if (content != NULL) {
6078 xmlFreeDocElementContent(ctxt->myDoc, content);
6079 }
Owen Taylor3473f882001-02-23 17:55:21 +00006080 }
Owen Taylor3473f882001-02-23 17:55:21 +00006081 }
6082 return(ret);
6083}
6084
6085/**
Owen Taylor3473f882001-02-23 17:55:21 +00006086 * xmlParseConditionalSections
6087 * @ctxt: an XML parser context
6088 *
6089 * [61] conditionalSect ::= includeSect | ignoreSect
6090 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6091 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6092 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6093 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6094 */
6095
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006096static void
Owen Taylor3473f882001-02-23 17:55:21 +00006097xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6098 SKIP(3);
6099 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006100 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006101 SKIP(7);
6102 SKIP_BLANKS;
6103 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006104 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 } else {
6106 NEXT;
6107 }
6108 if (xmlParserDebugEntities) {
6109 if ((ctxt->input != NULL) && (ctxt->input->filename))
6110 xmlGenericError(xmlGenericErrorContext,
6111 "%s(%d): ", ctxt->input->filename,
6112 ctxt->input->line);
6113 xmlGenericError(xmlGenericErrorContext,
6114 "Entering INCLUDE Conditional Section\n");
6115 }
6116
6117 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6118 (NXT(2) != '>'))) {
6119 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006120 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006121
6122 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6123 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006124 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006125 NEXT;
6126 } else if (RAW == '%') {
6127 xmlParsePEReference(ctxt);
6128 } else
6129 xmlParseMarkupDecl(ctxt);
6130
6131 /*
6132 * Pop-up of finished entities.
6133 */
6134 while ((RAW == 0) && (ctxt->inputNr > 1))
6135 xmlPopInput(ctxt);
6136
Daniel Veillardfdc91562002-07-01 21:52:03 +00006137 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006138 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006139 break;
6140 }
6141 }
6142 if (xmlParserDebugEntities) {
6143 if ((ctxt->input != NULL) && (ctxt->input->filename))
6144 xmlGenericError(xmlGenericErrorContext,
6145 "%s(%d): ", ctxt->input->filename,
6146 ctxt->input->line);
6147 xmlGenericError(xmlGenericErrorContext,
6148 "Leaving INCLUDE Conditional Section\n");
6149 }
6150
Daniel Veillarda07050d2003-10-19 14:46:32 +00006151 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006152 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006153 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006154 int depth = 0;
6155
6156 SKIP(6);
6157 SKIP_BLANKS;
6158 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006159 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006160 } else {
6161 NEXT;
6162 }
6163 if (xmlParserDebugEntities) {
6164 if ((ctxt->input != NULL) && (ctxt->input->filename))
6165 xmlGenericError(xmlGenericErrorContext,
6166 "%s(%d): ", ctxt->input->filename,
6167 ctxt->input->line);
6168 xmlGenericError(xmlGenericErrorContext,
6169 "Entering IGNORE Conditional Section\n");
6170 }
6171
6172 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006173 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006174 * But disable SAX event generating DTD building in the meantime
6175 */
6176 state = ctxt->disableSAX;
6177 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006179 ctxt->instate = XML_PARSER_IGNORE;
6180
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006181 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006182 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6183 depth++;
6184 SKIP(3);
6185 continue;
6186 }
6187 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6188 if (--depth >= 0) SKIP(3);
6189 continue;
6190 }
6191 NEXT;
6192 continue;
6193 }
6194
6195 ctxt->disableSAX = state;
6196 ctxt->instate = instate;
6197
6198 if (xmlParserDebugEntities) {
6199 if ((ctxt->input != NULL) && (ctxt->input->filename))
6200 xmlGenericError(xmlGenericErrorContext,
6201 "%s(%d): ", ctxt->input->filename,
6202 ctxt->input->line);
6203 xmlGenericError(xmlGenericErrorContext,
6204 "Leaving IGNORE Conditional Section\n");
6205 }
6206
6207 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006208 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210
6211 if (RAW == 0)
6212 SHRINK;
6213
6214 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006215 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 } else {
6217 SKIP(3);
6218 }
6219}
6220
6221/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006222 * xmlParseMarkupDecl:
6223 * @ctxt: an XML parser context
6224 *
6225 * parse Markup declarations
6226 *
6227 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6228 * NotationDecl | PI | Comment
6229 *
6230 * [ VC: Proper Declaration/PE Nesting ]
6231 * Parameter-entity replacement text must be properly nested with
6232 * markup declarations. That is to say, if either the first character
6233 * or the last character of a markup declaration (markupdecl above) is
6234 * contained in the replacement text for a parameter-entity reference,
6235 * both must be contained in the same replacement text.
6236 *
6237 * [ WFC: PEs in Internal Subset ]
6238 * In the internal DTD subset, parameter-entity references can occur
6239 * only where markup declarations can occur, not within markup declarations.
6240 * (This does not apply to references that occur in external parameter
6241 * entities or to the external subset.)
6242 */
6243void
6244xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6245 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006246 if (CUR == '<') {
6247 if (NXT(1) == '!') {
6248 switch (NXT(2)) {
6249 case 'E':
6250 if (NXT(3) == 'L')
6251 xmlParseElementDecl(ctxt);
6252 else if (NXT(3) == 'N')
6253 xmlParseEntityDecl(ctxt);
6254 break;
6255 case 'A':
6256 xmlParseAttributeListDecl(ctxt);
6257 break;
6258 case 'N':
6259 xmlParseNotationDecl(ctxt);
6260 break;
6261 case '-':
6262 xmlParseComment(ctxt);
6263 break;
6264 default:
6265 /* there is an error but it will be detected later */
6266 break;
6267 }
6268 } else if (NXT(1) == '?') {
6269 xmlParsePI(ctxt);
6270 }
6271 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006272 /*
6273 * This is only for internal subset. On external entities,
6274 * the replacement is done before parsing stage
6275 */
6276 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6277 xmlParsePEReference(ctxt);
6278
6279 /*
6280 * Conditional sections are allowed from entities included
6281 * by PE References in the internal subset.
6282 */
6283 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6284 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6285 xmlParseConditionalSections(ctxt);
6286 }
6287 }
6288
6289 ctxt->instate = XML_PARSER_DTD;
6290}
6291
6292/**
6293 * xmlParseTextDecl:
6294 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006295 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006296 * parse an XML declaration header for external entities
6297 *
6298 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006299 */
6300
6301void
6302xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6303 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006304 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006305
6306 /*
6307 * We know that '<?xml' is here.
6308 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006309 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006310 SKIP(5);
6311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006312 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006313 return;
6314 }
6315
William M. Brack76e95df2003-10-18 16:20:14 +00006316 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6318 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006319 }
6320 SKIP_BLANKS;
6321
6322 /*
6323 * We may have the VersionInfo here.
6324 */
6325 version = xmlParseVersionInfo(ctxt);
6326 if (version == NULL)
6327 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006328 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006329 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006330 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6331 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006332 }
6333 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006334 ctxt->input->version = version;
6335
6336 /*
6337 * We must have the encoding declaration
6338 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006339 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6341 /*
6342 * The XML REC instructs us to stop parsing right here
6343 */
6344 return;
6345 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006346 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6347 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6348 "Missing encoding in text declaration\n");
6349 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006350
6351 SKIP_BLANKS;
6352 if ((RAW == '?') && (NXT(1) == '>')) {
6353 SKIP(2);
6354 } else if (RAW == '>') {
6355 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006356 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006357 NEXT;
6358 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006359 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006360 MOVETO_ENDTAG(CUR_PTR);
6361 NEXT;
6362 }
6363}
6364
6365/**
Owen Taylor3473f882001-02-23 17:55:21 +00006366 * xmlParseExternalSubset:
6367 * @ctxt: an XML parser context
6368 * @ExternalID: the external identifier
6369 * @SystemID: the system identifier (or URL)
6370 *
6371 * parse Markup declarations from an external subset
6372 *
6373 * [30] extSubset ::= textDecl? extSubsetDecl
6374 *
6375 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6376 */
6377void
6378xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6379 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006380 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006381 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006382
6383 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6384 (ctxt->input->end - ctxt->input->cur >= 4)) {
6385 xmlChar start[4];
6386 xmlCharEncoding enc;
6387
6388 start[0] = RAW;
6389 start[1] = NXT(1);
6390 start[2] = NXT(2);
6391 start[3] = NXT(3);
6392 enc = xmlDetectCharEncoding(start, 4);
6393 if (enc != XML_CHAR_ENCODING_NONE)
6394 xmlSwitchEncoding(ctxt, enc);
6395 }
6396
Daniel Veillarda07050d2003-10-19 14:46:32 +00006397 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006398 xmlParseTextDecl(ctxt);
6399 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6400 /*
6401 * The XML REC instructs us to stop parsing right here
6402 */
6403 ctxt->instate = XML_PARSER_EOF;
6404 return;
6405 }
6406 }
6407 if (ctxt->myDoc == NULL) {
6408 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006409 if (ctxt->myDoc == NULL) {
6410 xmlErrMemory(ctxt, "New Doc failed");
6411 return;
6412 }
6413 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006414 }
6415 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6416 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6417
6418 ctxt->instate = XML_PARSER_DTD;
6419 ctxt->external = 1;
6420 while (((RAW == '<') && (NXT(1) == '?')) ||
6421 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006422 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006423 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006424 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006425
6426 GROW;
6427 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6428 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006429 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006430 NEXT;
6431 } else if (RAW == '%') {
6432 xmlParsePEReference(ctxt);
6433 } else
6434 xmlParseMarkupDecl(ctxt);
6435
6436 /*
6437 * Pop-up of finished entities.
6438 */
6439 while ((RAW == 0) && (ctxt->inputNr > 1))
6440 xmlPopInput(ctxt);
6441
Daniel Veillardfdc91562002-07-01 21:52:03 +00006442 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006443 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006444 break;
6445 }
6446 }
6447
6448 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006449 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 }
6451
6452}
6453
6454/**
6455 * xmlParseReference:
6456 * @ctxt: an XML parser context
6457 *
6458 * parse and handle entity references in content, depending on the SAX
6459 * interface, this may end-up in a call to character() if this is a
6460 * CharRef, a predefined entity, if there is no reference() callback.
6461 * or if the parser was asked to switch to that mode.
6462 *
6463 * [67] Reference ::= EntityRef | CharRef
6464 */
6465void
6466xmlParseReference(xmlParserCtxtPtr ctxt) {
6467 xmlEntityPtr ent;
6468 xmlChar *val;
6469 if (RAW != '&') return;
6470
6471 if (NXT(1) == '#') {
6472 int i = 0;
6473 xmlChar out[10];
6474 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006475 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006476
Daniel Veillarddc171602008-03-26 17:41:38 +00006477 if (value == 0)
6478 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006479 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6480 /*
6481 * So we are using non-UTF-8 buffers
6482 * Check that the char fit on 8bits, if not
6483 * generate a CharRef.
6484 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006485 if (value <= 0xFF) {
6486 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006487 out[1] = 0;
6488 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6489 (!ctxt->disableSAX))
6490 ctxt->sax->characters(ctxt->userData, out, 1);
6491 } else {
6492 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006493 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006494 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006495 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006496 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6497 (!ctxt->disableSAX))
6498 ctxt->sax->reference(ctxt->userData, out);
6499 }
6500 } else {
6501 /*
6502 * Just encode the value in UTF-8
6503 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006504 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006505 out[i] = 0;
6506 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6507 (!ctxt->disableSAX))
6508 ctxt->sax->characters(ctxt->userData, out, i);
6509 }
6510 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006511 int was_checked;
6512
Owen Taylor3473f882001-02-23 17:55:21 +00006513 ent = xmlParseEntityRef(ctxt);
6514 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006515 if (!ctxt->wellFormed)
6516 return;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006517 ctxt->nbentities++;
Daniel Veillard8915c152008-08-26 13:05:34 +00006518 if ((ctxt->nbentities >= 100000) &&
6519 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006520 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6521 return;
6522 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006523 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006524 if ((ent->name != NULL) &&
6525 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6526 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006527 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006528
6529
6530 /*
6531 * The first reference to the entity trigger a parsing phase
6532 * where the ent->children is filled with the result from
6533 * the parsing.
6534 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006535 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006536 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006537
Owen Taylor3473f882001-02-23 17:55:21 +00006538 value = ent->content;
6539
6540 /*
6541 * Check that this entity is well formed
6542 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006543 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006544 (value[1] == 0) && (value[0] == '<') &&
6545 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6546 /*
6547 * DONE: get definite answer on this !!!
6548 * Lots of entity decls are used to declare a single
6549 * char
6550 * <!ENTITY lt "<">
6551 * Which seems to be valid since
6552 * 2.4: The ampersand character (&) and the left angle
6553 * bracket (<) may appear in their literal form only
6554 * when used ... They are also legal within the literal
6555 * entity value of an internal entity declaration;i
6556 * see "4.3.2 Well-Formed Parsed Entities".
6557 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6558 * Looking at the OASIS test suite and James Clark
6559 * tests, this is broken. However the XML REC uses
6560 * it. Is the XML REC not well-formed ????
6561 * This is a hack to avoid this problem
6562 *
6563 * ANSWER: since lt gt amp .. are already defined,
6564 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006565 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006566 * is lousy but acceptable.
6567 */
6568 list = xmlNewDocText(ctxt->myDoc, value);
6569 if (list != NULL) {
6570 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6571 (ent->children == NULL)) {
6572 ent->children = list;
6573 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006574 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006575 list->parent = (xmlNodePtr) ent;
6576 } else {
6577 xmlFreeNodeList(list);
6578 }
6579 } else if (list != NULL) {
6580 xmlFreeNodeList(list);
6581 }
6582 } else {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006583 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00006584 /*
6585 * 4.3.2: An internal general parsed entity is well-formed
6586 * if its replacement text matches the production labeled
6587 * content.
6588 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006589
6590 void *user_data;
6591 /*
6592 * This is a bit hackish but this seems the best
6593 * way to make sure both SAX and DOM entity support
6594 * behaves okay.
6595 */
6596 if (ctxt->userData == ctxt)
6597 user_data = NULL;
6598 else
6599 user_data = ctxt->userData;
6600
Owen Taylor3473f882001-02-23 17:55:21 +00006601 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6602 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006603 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6604 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006605 ctxt->depth--;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006606
Owen Taylor3473f882001-02-23 17:55:21 +00006607 } else if (ent->etype ==
6608 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6609 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006610 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006611 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006612 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006613 ctxt->depth--;
6614 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006615 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006616 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6617 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006618 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006619 ent->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006621 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006622 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006623 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006624 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6625 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006626 (ent->children == NULL)) {
6627 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006628 if (ctxt->replaceEntities) {
6629 /*
6630 * Prune it directly in the generated document
6631 * except for single text nodes.
6632 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006633 if (((list->type == XML_TEXT_NODE) &&
6634 (list->next == NULL)) ||
6635 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006636 list->parent = (xmlNodePtr) ent;
6637 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006638 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006639 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006640 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006641 while (list != NULL) {
6642 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006643 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006644 if (list->next == NULL)
6645 ent->last = list;
6646 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006647 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006648 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006649#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006650 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6651 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006652#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006653 }
6654 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006655 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006656 while (list != NULL) {
6657 list->parent = (xmlNodePtr) ent;
6658 if (list->next == NULL)
6659 ent->last = list;
6660 list = list->next;
6661 }
Owen Taylor3473f882001-02-23 17:55:21 +00006662 }
6663 } else {
6664 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006665 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006667 } else if ((ret != XML_ERR_OK) &&
6668 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006669 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6670 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006671 } else if (list != NULL) {
6672 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006673 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006674 }
6675 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006676 if (ent->checked == 0)
6677 ent->checked = 1;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006678 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006679 ctxt->nbentities += ent->checked;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006680
6681 if (ent->children == NULL) {
6682 /*
6683 * Probably running in SAX mode and the callbacks don't
6684 * build the entity content. So unless we already went
6685 * though parsing for first checking go though the entity
6686 * content to generate callbacks associated to the entity
6687 */
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006688 if (was_checked != 0) {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006689 void *user_data;
6690 /*
6691 * This is a bit hackish but this seems the best
6692 * way to make sure both SAX and DOM entity support
6693 * behaves okay.
6694 */
6695 if (ctxt->userData == ctxt)
6696 user_data = NULL;
6697 else
6698 user_data = ctxt->userData;
6699
6700 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6701 ctxt->depth++;
6702 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6703 ent->content, user_data, NULL);
6704 ctxt->depth--;
6705 } else if (ent->etype ==
6706 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6707 ctxt->depth++;
6708 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6709 ctxt->sax, user_data, ctxt->depth,
6710 ent->URI, ent->ExternalID, NULL);
6711 ctxt->depth--;
6712 } else {
6713 ret = XML_ERR_ENTITY_PE_INTERNAL;
6714 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6715 "invalid entity type found\n", NULL);
6716 }
6717 if (ret == XML_ERR_ENTITY_LOOP) {
6718 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6719 return;
6720 }
6721 }
6722 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6723 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6724 /*
6725 * Entity reference callback comes second, it's somewhat
6726 * superfluous but a compatibility to historical behaviour
6727 */
6728 ctxt->sax->reference(ctxt->userData, ent->name);
6729 }
6730 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006731 }
6732 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006733 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006734 /*
6735 * Create a node.
6736 */
6737 ctxt->sax->reference(ctxt->userData, ent->name);
6738 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006739 }
6740 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006741 /*
6742 * There is a problem on the handling of _private for entities
6743 * (bug 155816): Should we copy the content of the field from
6744 * the entity (possibly overwriting some value set by the user
6745 * when a copy is created), should we leave it alone, or should
6746 * we try to take care of different situations? The problem
6747 * is exacerbated by the usage of this field by the xmlReader.
6748 * To fix this bug, we look at _private on the created node
6749 * and, if it's NULL, we copy in whatever was in the entity.
6750 * If it's not NULL we leave it alone. This is somewhat of a
6751 * hack - maybe we should have further tests to determine
6752 * what to do.
6753 */
Owen Taylor3473f882001-02-23 17:55:21 +00006754 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6755 /*
6756 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006757 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006758 * In the first occurrence list contains the replacement.
6759 * progressive == 2 means we are operating on the Reader
6760 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006761 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006762 if (((list == NULL) && (ent->owner == 0)) ||
6763 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006764 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006765
6766 /*
6767 * when operating on a reader, the entities definitions
6768 * are always owning the entities subtree.
6769 if (ctxt->parseMode == XML_PARSE_READER)
6770 ent->owner = 1;
6771 */
6772
Daniel Veillard62f313b2001-07-04 19:49:14 +00006773 cur = ent->children;
6774 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006775 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006776 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006777 if (nw->_private == NULL)
6778 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006779 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006780 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006781 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006782 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006783 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006784 if (cur == ent->last) {
6785 /*
6786 * needed to detect some strange empty
6787 * node cases in the reader tests
6788 */
6789 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006790 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006791 (nw->type == XML_ELEMENT_NODE) &&
6792 (nw->children == NULL))
6793 nw->extra = 1;
6794
Daniel Veillard62f313b2001-07-04 19:49:14 +00006795 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006796 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006797 cur = cur->next;
6798 }
Daniel Veillard81273902003-09-30 00:43:48 +00006799#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006800 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006801 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006802#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006803 } else if (list == NULL) {
6804 xmlNodePtr nw = NULL, cur, next, last,
6805 firstChild = NULL;
6806 /*
6807 * Copy the entity child list and make it the new
6808 * entity child list. The goal is to make sure any
6809 * ID or REF referenced will be the one from the
6810 * document content and not the entity copy.
6811 */
6812 cur = ent->children;
6813 ent->children = NULL;
6814 last = ent->last;
6815 ent->last = NULL;
6816 while (cur != NULL) {
6817 next = cur->next;
6818 cur->next = NULL;
6819 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006820 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006821 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006822 if (nw->_private == NULL)
6823 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006824 if (firstChild == NULL){
6825 firstChild = cur;
6826 }
6827 xmlAddChild((xmlNodePtr) ent, nw);
6828 xmlAddChild(ctxt->node, cur);
6829 }
6830 if (cur == last)
6831 break;
6832 cur = next;
6833 }
6834 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006835#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006836 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6837 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006838#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006839 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006840 const xmlChar *nbktext;
6841
Daniel Veillard62f313b2001-07-04 19:49:14 +00006842 /*
6843 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006844 * node with a possible previous text one which
6845 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006846 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006847 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6848 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006849 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006850 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006851 if ((ent->last != ent->children) &&
6852 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006853 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006854 xmlAddChildList(ctxt->node, ent->children);
6855 }
6856
Owen Taylor3473f882001-02-23 17:55:21 +00006857 /*
6858 * This is to avoid a nasty side effect, see
6859 * characters() in SAX.c
6860 */
6861 ctxt->nodemem = 0;
6862 ctxt->nodelen = 0;
6863 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006864 }
6865 }
6866 } else {
6867 val = ent->content;
6868 if (val == NULL) return;
6869 /*
6870 * inline the entity.
6871 */
6872 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6873 (!ctxt->disableSAX))
6874 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6875 }
6876 }
6877}
6878
6879/**
6880 * xmlParseEntityRef:
6881 * @ctxt: an XML parser context
6882 *
6883 * parse ENTITY references declarations
6884 *
6885 * [68] EntityRef ::= '&' Name ';'
6886 *
6887 * [ WFC: Entity Declared ]
6888 * In a document without any DTD, a document with only an internal DTD
6889 * subset which contains no parameter entity references, or a document
6890 * with "standalone='yes'", the Name given in the entity reference
6891 * must match that in an entity declaration, except that well-formed
6892 * documents need not declare any of the following entities: amp, lt,
6893 * gt, apos, quot. The declaration of a parameter entity must precede
6894 * any reference to it. Similarly, the declaration of a general entity
6895 * must precede any reference to it which appears in a default value in an
6896 * attribute-list declaration. Note that if entities are declared in the
6897 * external subset or in external parameter entities, a non-validating
6898 * processor is not obligated to read and process their declarations;
6899 * for such documents, the rule that an entity must be declared is a
6900 * well-formedness constraint only if standalone='yes'.
6901 *
6902 * [ WFC: Parsed Entity ]
6903 * An entity reference must not contain the name of an unparsed entity
6904 *
6905 * Returns the xmlEntityPtr if found, or NULL otherwise.
6906 */
6907xmlEntityPtr
6908xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006909 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006910 xmlEntityPtr ent = NULL;
6911
6912 GROW;
6913
6914 if (RAW == '&') {
6915 NEXT;
6916 name = xmlParseName(ctxt);
6917 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006918 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6919 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006920 } else {
6921 if (RAW == ';') {
6922 NEXT;
6923 /*
6924 * Ask first SAX for entity resolution, otherwise try the
6925 * predefined set.
6926 */
6927 if (ctxt->sax != NULL) {
6928 if (ctxt->sax->getEntity != NULL)
6929 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006930 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006931 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006932 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6933 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006934 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006935 }
Owen Taylor3473f882001-02-23 17:55:21 +00006936 }
6937 /*
6938 * [ WFC: Entity Declared ]
6939 * In a document without any DTD, a document with only an
6940 * internal DTD subset which contains no parameter entity
6941 * references, or a document with "standalone='yes'", the
6942 * Name given in the entity reference must match that in an
6943 * entity declaration, except that well-formed documents
6944 * need not declare any of the following entities: amp, lt,
6945 * gt, apos, quot.
6946 * The declaration of a parameter entity must precede any
6947 * reference to it.
6948 * Similarly, the declaration of a general entity must
6949 * precede any reference to it which appears in a default
6950 * value in an attribute-list declaration. Note that if
6951 * entities are declared in the external subset or in
6952 * external parameter entities, a non-validating processor
6953 * is not obligated to read and process their declarations;
6954 * for such documents, the rule that an entity must be
6955 * declared is a well-formedness constraint only if
6956 * standalone='yes'.
6957 */
6958 if (ent == NULL) {
6959 if ((ctxt->standalone == 1) ||
6960 ((ctxt->hasExternalSubset == 0) &&
6961 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006962 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006963 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006964 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006965 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006966 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006967 if ((ctxt->inSubset == 0) &&
6968 (ctxt->sax != NULL) &&
6969 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006970 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006971 }
Owen Taylor3473f882001-02-23 17:55:21 +00006972 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006973 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006974 }
6975
6976 /*
6977 * [ WFC: Parsed Entity ]
6978 * An entity reference must not contain the name of an
6979 * unparsed entity
6980 */
6981 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006982 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006983 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006984 }
6985
6986 /*
6987 * [ WFC: No External Entity References ]
6988 * Attribute values cannot contain direct or indirect
6989 * entity references to external entities.
6990 */
6991 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6992 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006993 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6994 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006995 }
6996 /*
6997 * [ WFC: No < in Attribute Values ]
6998 * The replacement text of any entity referred to directly or
6999 * indirectly in an attribute value (other than "&lt;") must
7000 * not contain a <.
7001 */
7002 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7003 (ent != NULL) &&
7004 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7005 (ent->content != NULL) &&
7006 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007007 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00007008 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007009 }
7010
7011 /*
7012 * Internal check, no parameter entities here ...
7013 */
7014 else {
7015 switch (ent->etype) {
7016 case XML_INTERNAL_PARAMETER_ENTITY:
7017 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007018 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7019 "Attempt to reference the parameter entity '%s'\n",
7020 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007021 break;
7022 default:
7023 break;
7024 }
7025 }
7026
7027 /*
7028 * [ WFC: No Recursion ]
7029 * A parsed entity must not contain a recursive reference
7030 * to itself, either directly or indirectly.
7031 * Done somewhere else
7032 */
7033
7034 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007035 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007036 }
Owen Taylor3473f882001-02-23 17:55:21 +00007037 }
7038 }
7039 return(ent);
7040}
7041
7042/**
7043 * xmlParseStringEntityRef:
7044 * @ctxt: an XML parser context
7045 * @str: a pointer to an index in the string
7046 *
7047 * parse ENTITY references declarations, but this version parses it from
7048 * a string value.
7049 *
7050 * [68] EntityRef ::= '&' Name ';'
7051 *
7052 * [ WFC: Entity Declared ]
7053 * In a document without any DTD, a document with only an internal DTD
7054 * subset which contains no parameter entity references, or a document
7055 * with "standalone='yes'", the Name given in the entity reference
7056 * must match that in an entity declaration, except that well-formed
7057 * documents need not declare any of the following entities: amp, lt,
7058 * gt, apos, quot. The declaration of a parameter entity must precede
7059 * any reference to it. Similarly, the declaration of a general entity
7060 * must precede any reference to it which appears in a default value in an
7061 * attribute-list declaration. Note that if entities are declared in the
7062 * external subset or in external parameter entities, a non-validating
7063 * processor is not obligated to read and process their declarations;
7064 * for such documents, the rule that an entity must be declared is a
7065 * well-formedness constraint only if standalone='yes'.
7066 *
7067 * [ WFC: Parsed Entity ]
7068 * An entity reference must not contain the name of an unparsed entity
7069 *
7070 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7071 * is updated to the current location in the string.
7072 */
7073xmlEntityPtr
7074xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7075 xmlChar *name;
7076 const xmlChar *ptr;
7077 xmlChar cur;
7078 xmlEntityPtr ent = NULL;
7079
7080 if ((str == NULL) || (*str == NULL))
7081 return(NULL);
7082 ptr = *str;
7083 cur = *ptr;
7084 if (cur == '&') {
7085 ptr++;
7086 cur = *ptr;
7087 name = xmlParseStringName(ctxt, &ptr);
7088 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007089 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7090 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007091 } else {
7092 if (*ptr == ';') {
7093 ptr++;
7094 /*
7095 * Ask first SAX for entity resolution, otherwise try the
7096 * predefined set.
7097 */
7098 if (ctxt->sax != NULL) {
7099 if (ctxt->sax->getEntity != NULL)
7100 ent = ctxt->sax->getEntity(ctxt->userData, name);
7101 if (ent == NULL)
7102 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007103 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00007104 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007105 }
Owen Taylor3473f882001-02-23 17:55:21 +00007106 }
7107 /*
7108 * [ WFC: Entity Declared ]
7109 * In a document without any DTD, a document with only an
7110 * internal DTD subset which contains no parameter entity
7111 * references, or a document with "standalone='yes'", the
7112 * Name given in the entity reference must match that in an
7113 * entity declaration, except that well-formed documents
7114 * need not declare any of the following entities: amp, lt,
7115 * gt, apos, quot.
7116 * The declaration of a parameter entity must precede any
7117 * reference to it.
7118 * Similarly, the declaration of a general entity must
7119 * precede any reference to it which appears in a default
7120 * value in an attribute-list declaration. Note that if
7121 * entities are declared in the external subset or in
7122 * external parameter entities, a non-validating processor
7123 * is not obligated to read and process their declarations;
7124 * for such documents, the rule that an entity must be
7125 * declared is a well-formedness constraint only if
7126 * standalone='yes'.
7127 */
7128 if (ent == NULL) {
7129 if ((ctxt->standalone == 1) ||
7130 ((ctxt->hasExternalSubset == 0) &&
7131 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007132 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007133 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007134 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007135 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00007136 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007137 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007138 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00007139 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00007140 }
7141
7142 /*
7143 * [ WFC: Parsed Entity ]
7144 * An entity reference must not contain the name of an
7145 * unparsed entity
7146 */
7147 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007148 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007149 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007150 }
7151
7152 /*
7153 * [ WFC: No External Entity References ]
7154 * Attribute values cannot contain direct or indirect
7155 * entity references to external entities.
7156 */
7157 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7158 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007159 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00007160 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007161 }
7162 /*
7163 * [ WFC: No < in Attribute Values ]
7164 * The replacement text of any entity referred to directly or
7165 * indirectly in an attribute value (other than "&lt;") must
7166 * not contain a <.
7167 */
7168 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7169 (ent != NULL) &&
7170 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7171 (ent->content != NULL) &&
7172 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007173 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7174 "'<' in entity '%s' is not allowed in attributes values\n",
7175 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007176 }
7177
7178 /*
7179 * Internal check, no parameter entities here ...
7180 */
7181 else {
7182 switch (ent->etype) {
7183 case XML_INTERNAL_PARAMETER_ENTITY:
7184 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00007185 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7186 "Attempt to reference the parameter entity '%s'\n",
7187 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007188 break;
7189 default:
7190 break;
7191 }
7192 }
7193
7194 /*
7195 * [ WFC: No Recursion ]
7196 * A parsed entity must not contain a recursive reference
7197 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007198 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00007199 */
7200
7201 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007202 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007203 }
7204 xmlFree(name);
7205 }
7206 }
7207 *str = ptr;
7208 return(ent);
7209}
7210
7211/**
7212 * xmlParsePEReference:
7213 * @ctxt: an XML parser context
7214 *
7215 * parse PEReference declarations
7216 * The entity content is handled directly by pushing it's content as
7217 * a new input stream.
7218 *
7219 * [69] PEReference ::= '%' Name ';'
7220 *
7221 * [ WFC: No Recursion ]
7222 * A parsed entity must not contain a recursive
7223 * reference to itself, either directly or indirectly.
7224 *
7225 * [ WFC: Entity Declared ]
7226 * In a document without any DTD, a document with only an internal DTD
7227 * subset which contains no parameter entity references, or a document
7228 * with "standalone='yes'", ... ... The declaration of a parameter
7229 * entity must precede any reference to it...
7230 *
7231 * [ VC: Entity Declared ]
7232 * In a document with an external subset or external parameter entities
7233 * with "standalone='no'", ... ... The declaration of a parameter entity
7234 * must precede any reference to it...
7235 *
7236 * [ WFC: In DTD ]
7237 * Parameter-entity references may only appear in the DTD.
7238 * NOTE: misleading but this is handled.
7239 */
7240void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007241xmlParsePEReference(xmlParserCtxtPtr ctxt)
7242{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007243 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007244 xmlEntityPtr entity = NULL;
7245 xmlParserInputPtr input;
7246
7247 if (RAW == '%') {
7248 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00007249 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00007250 if (name == NULL) {
7251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7252 "xmlParsePEReference: no name\n");
7253 } else {
7254 if (RAW == ';') {
7255 NEXT;
7256 if ((ctxt->sax != NULL) &&
7257 (ctxt->sax->getParameterEntity != NULL))
7258 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7259 name);
7260 if (entity == NULL) {
7261 /*
7262 * [ WFC: Entity Declared ]
7263 * In a document without any DTD, a document with only an
7264 * internal DTD subset which contains no parameter entity
7265 * references, or a document with "standalone='yes'", ...
7266 * ... The declaration of a parameter entity must precede
7267 * any reference to it...
7268 */
7269 if ((ctxt->standalone == 1) ||
7270 ((ctxt->hasExternalSubset == 0) &&
7271 (ctxt->hasPErefs == 0))) {
7272 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7273 "PEReference: %%%s; not found\n",
7274 name);
7275 } else {
7276 /*
7277 * [ VC: Entity Declared ]
7278 * In a document with an external subset or external
7279 * parameter entities with "standalone='no'", ...
7280 * ... The declaration of a parameter entity must
7281 * precede any reference to it...
7282 */
7283 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7284 "PEReference: %%%s; not found\n",
7285 name, NULL);
7286 ctxt->valid = 0;
7287 }
7288 } else {
7289 /*
7290 * Internal checking in case the entity quest barfed
7291 */
7292 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7293 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7294 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7295 "Internal: %%%s; is not a parameter entity\n",
7296 name, NULL);
7297 } else if (ctxt->input->free != deallocblankswrapper) {
7298 input =
7299 xmlNewBlanksWrapperInputStream(ctxt, entity);
7300 xmlPushInput(ctxt, input);
7301 } else {
7302 /*
7303 * TODO !!!
7304 * handle the extra spaces added before and after
7305 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7306 */
7307 input = xmlNewEntityInputStream(ctxt, entity);
7308 xmlPushInput(ctxt, input);
7309 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00007310 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00007311 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00007312 xmlParseTextDecl(ctxt);
7313 if (ctxt->errNo ==
7314 XML_ERR_UNSUPPORTED_ENCODING) {
7315 /*
7316 * The XML REC instructs us to stop parsing
7317 * right here
7318 */
7319 ctxt->instate = XML_PARSER_EOF;
7320 return;
7321 }
7322 }
7323 }
7324 }
7325 ctxt->hasPErefs = 1;
7326 } else {
7327 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7328 }
7329 }
Owen Taylor3473f882001-02-23 17:55:21 +00007330 }
7331}
7332
7333/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007334 * xmlLoadEntityContent:
7335 * @ctxt: an XML parser context
7336 * @entity: an unloaded system entity
7337 *
7338 * Load the original content of the given system entity from the
7339 * ExternalID/SystemID given. This is to be used for Included in Literal
7340 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7341 *
7342 * Returns 0 in case of success and -1 in case of failure
7343 */
7344static int
7345xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7346 xmlParserInputPtr input;
7347 xmlBufferPtr buf;
7348 int l, c;
7349 int count = 0;
7350
7351 if ((ctxt == NULL) || (entity == NULL) ||
7352 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7353 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7354 (entity->content != NULL)) {
7355 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7356 "xmlLoadEntityContent parameter error");
7357 return(-1);
7358 }
7359
7360 if (xmlParserDebugEntities)
7361 xmlGenericError(xmlGenericErrorContext,
7362 "Reading %s entity content input\n", entity->name);
7363
7364 buf = xmlBufferCreate();
7365 if (buf == NULL) {
7366 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7367 "xmlLoadEntityContent parameter error");
7368 return(-1);
7369 }
7370
7371 input = xmlNewEntityInputStream(ctxt, entity);
7372 if (input == NULL) {
7373 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7374 "xmlLoadEntityContent input error");
7375 xmlBufferFree(buf);
7376 return(-1);
7377 }
7378
7379 /*
7380 * Push the entity as the current input, read char by char
7381 * saving to the buffer until the end of the entity or an error
7382 */
7383 xmlPushInput(ctxt, input);
7384 GROW;
7385 c = CUR_CHAR(l);
7386 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7387 (IS_CHAR(c))) {
7388 xmlBufferAdd(buf, ctxt->input->cur, l);
7389 if (count++ > 100) {
7390 count = 0;
7391 GROW;
7392 }
7393 NEXTL(l);
7394 c = CUR_CHAR(l);
7395 }
7396
7397 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7398 xmlPopInput(ctxt);
7399 } else if (!IS_CHAR(c)) {
7400 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7401 "xmlLoadEntityContent: invalid char value %d\n",
7402 c);
7403 xmlBufferFree(buf);
7404 return(-1);
7405 }
7406 entity->content = buf->content;
7407 buf->content = NULL;
7408 xmlBufferFree(buf);
7409
7410 return(0);
7411}
7412
7413/**
Owen Taylor3473f882001-02-23 17:55:21 +00007414 * xmlParseStringPEReference:
7415 * @ctxt: an XML parser context
7416 * @str: a pointer to an index in the string
7417 *
7418 * parse PEReference declarations
7419 *
7420 * [69] PEReference ::= '%' Name ';'
7421 *
7422 * [ WFC: No Recursion ]
7423 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007424 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007425 *
7426 * [ WFC: Entity Declared ]
7427 * In a document without any DTD, a document with only an internal DTD
7428 * subset which contains no parameter entity references, or a document
7429 * with "standalone='yes'", ... ... The declaration of a parameter
7430 * entity must precede any reference to it...
7431 *
7432 * [ VC: Entity Declared ]
7433 * In a document with an external subset or external parameter entities
7434 * with "standalone='no'", ... ... The declaration of a parameter entity
7435 * must precede any reference to it...
7436 *
7437 * [ WFC: In DTD ]
7438 * Parameter-entity references may only appear in the DTD.
7439 * NOTE: misleading but this is handled.
7440 *
7441 * Returns the string of the entity content.
7442 * str is updated to the current value of the index
7443 */
7444xmlEntityPtr
7445xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7446 const xmlChar *ptr;
7447 xmlChar cur;
7448 xmlChar *name;
7449 xmlEntityPtr entity = NULL;
7450
7451 if ((str == NULL) || (*str == NULL)) return(NULL);
7452 ptr = *str;
7453 cur = *ptr;
7454 if (cur == '%') {
7455 ptr++;
7456 cur = *ptr;
7457 name = xmlParseStringName(ctxt, &ptr);
7458 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007459 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7460 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007461 } else {
7462 cur = *ptr;
7463 if (cur == ';') {
7464 ptr++;
7465 cur = *ptr;
7466 if ((ctxt->sax != NULL) &&
7467 (ctxt->sax->getParameterEntity != NULL))
7468 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7469 name);
7470 if (entity == NULL) {
7471 /*
7472 * [ WFC: Entity Declared ]
7473 * In a document without any DTD, a document with only an
7474 * internal DTD subset which contains no parameter entity
7475 * references, or a document with "standalone='yes'", ...
7476 * ... The declaration of a parameter entity must precede
7477 * any reference to it...
7478 */
7479 if ((ctxt->standalone == 1) ||
7480 ((ctxt->hasExternalSubset == 0) &&
7481 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007482 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007483 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007484 } else {
7485 /*
7486 * [ VC: Entity Declared ]
7487 * In a document with an external subset or external
7488 * parameter entities with "standalone='no'", ...
7489 * ... The declaration of a parameter entity must
7490 * precede any reference to it...
7491 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007492 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7493 "PEReference: %%%s; not found\n",
7494 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007495 ctxt->valid = 0;
7496 }
7497 } else {
7498 /*
7499 * Internal checking in case the entity quest barfed
7500 */
7501 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7502 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007503 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7504 "%%%s; is not a parameter entity\n",
7505 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007506 }
7507 }
7508 ctxt->hasPErefs = 1;
7509 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007510 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007511 }
7512 xmlFree(name);
7513 }
7514 }
7515 *str = ptr;
7516 return(entity);
7517}
7518
7519/**
7520 * xmlParseDocTypeDecl:
7521 * @ctxt: an XML parser context
7522 *
7523 * parse a DOCTYPE declaration
7524 *
7525 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7526 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7527 *
7528 * [ VC: Root Element Type ]
7529 * The Name in the document type declaration must match the element
7530 * type of the root element.
7531 */
7532
7533void
7534xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007535 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007536 xmlChar *ExternalID = NULL;
7537 xmlChar *URI = NULL;
7538
7539 /*
7540 * We know that '<!DOCTYPE' has been detected.
7541 */
7542 SKIP(9);
7543
7544 SKIP_BLANKS;
7545
7546 /*
7547 * Parse the DOCTYPE name.
7548 */
7549 name = xmlParseName(ctxt);
7550 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007551 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7552 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007553 }
7554 ctxt->intSubName = name;
7555
7556 SKIP_BLANKS;
7557
7558 /*
7559 * Check for SystemID and ExternalID
7560 */
7561 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7562
7563 if ((URI != NULL) || (ExternalID != NULL)) {
7564 ctxt->hasExternalSubset = 1;
7565 }
7566 ctxt->extSubURI = URI;
7567 ctxt->extSubSystem = ExternalID;
7568
7569 SKIP_BLANKS;
7570
7571 /*
7572 * Create and update the internal subset.
7573 */
7574 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7575 (!ctxt->disableSAX))
7576 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7577
7578 /*
7579 * Is there any internal subset declarations ?
7580 * they are handled separately in xmlParseInternalSubset()
7581 */
7582 if (RAW == '[')
7583 return;
7584
7585 /*
7586 * We should be at the end of the DOCTYPE declaration.
7587 */
7588 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007589 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007590 }
7591 NEXT;
7592}
7593
7594/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007595 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007596 * @ctxt: an XML parser context
7597 *
7598 * parse the internal subset declaration
7599 *
7600 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7601 */
7602
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007603static void
Owen Taylor3473f882001-02-23 17:55:21 +00007604xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7605 /*
7606 * Is there any DTD definition ?
7607 */
7608 if (RAW == '[') {
7609 ctxt->instate = XML_PARSER_DTD;
7610 NEXT;
7611 /*
7612 * Parse the succession of Markup declarations and
7613 * PEReferences.
7614 * Subsequence (markupdecl | PEReference | S)*
7615 */
7616 while (RAW != ']') {
7617 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007618 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007619
7620 SKIP_BLANKS;
7621 xmlParseMarkupDecl(ctxt);
7622 xmlParsePEReference(ctxt);
7623
7624 /*
7625 * Pop-up of finished entities.
7626 */
7627 while ((RAW == 0) && (ctxt->inputNr > 1))
7628 xmlPopInput(ctxt);
7629
7630 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007631 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007632 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007633 break;
7634 }
7635 }
7636 if (RAW == ']') {
7637 NEXT;
7638 SKIP_BLANKS;
7639 }
7640 }
7641
7642 /*
7643 * We should be at the end of the DOCTYPE declaration.
7644 */
7645 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007646 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007647 }
7648 NEXT;
7649}
7650
Daniel Veillard81273902003-09-30 00:43:48 +00007651#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007652/**
7653 * xmlParseAttribute:
7654 * @ctxt: an XML parser context
7655 * @value: a xmlChar ** used to store the value of the attribute
7656 *
7657 * parse an attribute
7658 *
7659 * [41] Attribute ::= Name Eq AttValue
7660 *
7661 * [ WFC: No External Entity References ]
7662 * Attribute values cannot contain direct or indirect entity references
7663 * to external entities.
7664 *
7665 * [ WFC: No < in Attribute Values ]
7666 * The replacement text of any entity referred to directly or indirectly in
7667 * an attribute value (other than "&lt;") must not contain a <.
7668 *
7669 * [ VC: Attribute Value Type ]
7670 * The attribute must have been declared; the value must be of the type
7671 * declared for it.
7672 *
7673 * [25] Eq ::= S? '=' S?
7674 *
7675 * With namespace:
7676 *
7677 * [NS 11] Attribute ::= QName Eq AttValue
7678 *
7679 * Also the case QName == xmlns:??? is handled independently as a namespace
7680 * definition.
7681 *
7682 * Returns the attribute name, and the value in *value.
7683 */
7684
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007685const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007686xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007687 const xmlChar *name;
7688 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007689
7690 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007691 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007692 name = xmlParseName(ctxt);
7693 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007694 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007695 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007696 return(NULL);
7697 }
7698
7699 /*
7700 * read the value
7701 */
7702 SKIP_BLANKS;
7703 if (RAW == '=') {
7704 NEXT;
7705 SKIP_BLANKS;
7706 val = xmlParseAttValue(ctxt);
7707 ctxt->instate = XML_PARSER_CONTENT;
7708 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007709 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007710 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007711 return(NULL);
7712 }
7713
7714 /*
7715 * Check that xml:lang conforms to the specification
7716 * No more registered as an error, just generate a warning now
7717 * since this was deprecated in XML second edition
7718 */
7719 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7720 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007721 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7722 "Malformed value for xml:lang : %s\n",
7723 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007724 }
7725 }
7726
7727 /*
7728 * Check that xml:space conforms to the specification
7729 */
7730 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7731 if (xmlStrEqual(val, BAD_CAST "default"))
7732 *(ctxt->space) = 0;
7733 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7734 *(ctxt->space) = 1;
7735 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007736 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007737"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007738 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007739 }
7740 }
7741
7742 *value = val;
7743 return(name);
7744}
7745
7746/**
7747 * xmlParseStartTag:
7748 * @ctxt: an XML parser context
7749 *
7750 * parse a start of tag either for rule element or
7751 * EmptyElement. In both case we don't parse the tag closing chars.
7752 *
7753 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7754 *
7755 * [ WFC: Unique Att Spec ]
7756 * No attribute name may appear more than once in the same start-tag or
7757 * empty-element tag.
7758 *
7759 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7760 *
7761 * [ WFC: Unique Att Spec ]
7762 * No attribute name may appear more than once in the same start-tag or
7763 * empty-element tag.
7764 *
7765 * With namespace:
7766 *
7767 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7768 *
7769 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7770 *
7771 * Returns the element name parsed
7772 */
7773
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007774const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007775xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007776 const xmlChar *name;
7777 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007778 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007779 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007780 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007781 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007782 int i;
7783
7784 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007785 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007786
7787 name = xmlParseName(ctxt);
7788 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007789 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007790 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007791 return(NULL);
7792 }
7793
7794 /*
7795 * Now parse the attributes, it ends up with the ending
7796 *
7797 * (S Attribute)* S?
7798 */
7799 SKIP_BLANKS;
7800 GROW;
7801
Daniel Veillard21a0f912001-02-25 19:54:14 +00007802 while ((RAW != '>') &&
7803 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007804 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007805 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007806 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007807
7808 attname = xmlParseAttribute(ctxt, &attvalue);
7809 if ((attname != NULL) && (attvalue != NULL)) {
7810 /*
7811 * [ WFC: Unique Att Spec ]
7812 * No attribute name may appear more than once in the same
7813 * start-tag or empty-element tag.
7814 */
7815 for (i = 0; i < nbatts;i += 2) {
7816 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007817 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007818 xmlFree(attvalue);
7819 goto failed;
7820 }
7821 }
Owen Taylor3473f882001-02-23 17:55:21 +00007822 /*
7823 * Add the pair to atts
7824 */
7825 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007826 maxatts = 22; /* allow for 10 attrs by default */
7827 atts = (const xmlChar **)
7828 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007829 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007830 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007831 if (attvalue != NULL)
7832 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007833 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007834 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007835 ctxt->atts = atts;
7836 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007837 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007838 const xmlChar **n;
7839
Owen Taylor3473f882001-02-23 17:55:21 +00007840 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007841 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007842 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007843 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007844 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007845 if (attvalue != NULL)
7846 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007847 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007848 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007849 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007850 ctxt->atts = atts;
7851 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007852 }
7853 atts[nbatts++] = attname;
7854 atts[nbatts++] = attvalue;
7855 atts[nbatts] = NULL;
7856 atts[nbatts + 1] = NULL;
7857 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007858 if (attvalue != NULL)
7859 xmlFree(attvalue);
7860 }
7861
7862failed:
7863
Daniel Veillard3772de32002-12-17 10:31:45 +00007864 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007865 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7866 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007867 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7869 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007870 }
7871 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007872 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7873 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007874 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7875 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007876 break;
7877 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007878 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007879 GROW;
7880 }
7881
7882 /*
7883 * SAX: Start of Element !
7884 */
7885 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007886 (!ctxt->disableSAX)) {
7887 if (nbatts > 0)
7888 ctxt->sax->startElement(ctxt->userData, name, atts);
7889 else
7890 ctxt->sax->startElement(ctxt->userData, name, NULL);
7891 }
Owen Taylor3473f882001-02-23 17:55:21 +00007892
7893 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007894 /* Free only the content strings */
7895 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007896 if (atts[i] != NULL)
7897 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007898 }
7899 return(name);
7900}
7901
7902/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007904 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007905 * @line: line of the start tag
7906 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007907 *
7908 * parse an end of tag
7909 *
7910 * [42] ETag ::= '</' Name S? '>'
7911 *
7912 * With namespace
7913 *
7914 * [NS 9] ETag ::= '</' QName S? '>'
7915 */
7916
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007917static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007918xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007919 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007920
7921 GROW;
7922 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007923 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007924 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007925 return;
7926 }
7927 SKIP(2);
7928
Daniel Veillard46de64e2002-05-29 08:21:33 +00007929 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007930
7931 /*
7932 * We should definitely be at the ending "S? '>'" part
7933 */
7934 GROW;
7935 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007936 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007937 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007938 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007939 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007940
7941 /*
7942 * [ WFC: Element Type Match ]
7943 * The Name in an element's end-tag must match the element type in the
7944 * start-tag.
7945 *
7946 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007947 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007948 if (name == NULL) name = BAD_CAST "unparseable";
7949 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007950 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007951 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007952 }
7953
7954 /*
7955 * SAX: End of Tag
7956 */
7957 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7958 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007959 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007960
Daniel Veillarde57ec792003-09-10 10:50:59 +00007961 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007962 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007963 return;
7964}
7965
7966/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007967 * xmlParseEndTag:
7968 * @ctxt: an XML parser context
7969 *
7970 * parse an end of tag
7971 *
7972 * [42] ETag ::= '</' Name S? '>'
7973 *
7974 * With namespace
7975 *
7976 * [NS 9] ETag ::= '</' QName S? '>'
7977 */
7978
7979void
7980xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007981 xmlParseEndTag1(ctxt, 0);
7982}
Daniel Veillard81273902003-09-30 00:43:48 +00007983#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984
7985/************************************************************************
7986 * *
7987 * SAX 2 specific operations *
7988 * *
7989 ************************************************************************/
7990
Daniel Veillard0fb18932003-09-07 09:14:37 +00007991/*
7992 * xmlGetNamespace:
7993 * @ctxt: an XML parser context
7994 * @prefix: the prefix to lookup
7995 *
7996 * Lookup the namespace name for the @prefix (which ca be NULL)
7997 * The prefix must come from the @ctxt->dict dictionnary
7998 *
7999 * Returns the namespace name or NULL if not bound
8000 */
8001static const xmlChar *
8002xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8003 int i;
8004
Daniel Veillarde57ec792003-09-10 10:50:59 +00008005 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008006 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008007 if (ctxt->nsTab[i] == prefix) {
8008 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8009 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008010 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008011 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008012 return(NULL);
8013}
8014
8015/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 * xmlParseQName:
8017 * @ctxt: an XML parser context
8018 * @prefix: pointer to store the prefix part
8019 *
8020 * parse an XML Namespace QName
8021 *
8022 * [6] QName ::= (Prefix ':')? LocalPart
8023 * [7] Prefix ::= NCName
8024 * [8] LocalPart ::= NCName
8025 *
8026 * Returns the Name parsed or NULL
8027 */
8028
8029static const xmlChar *
8030xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8031 const xmlChar *l, *p;
8032
8033 GROW;
8034
8035 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008036 if (l == NULL) {
8037 if (CUR == ':') {
8038 l = xmlParseName(ctxt);
8039 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008040 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8041 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008042 *prefix = NULL;
8043 return(l);
8044 }
8045 }
8046 return(NULL);
8047 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008048 if (CUR == ':') {
8049 NEXT;
8050 p = l;
8051 l = xmlParseNCName(ctxt);
8052 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008053 xmlChar *tmp;
8054
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008055 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8056 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008057 l = xmlParseNmtoken(ctxt);
8058 if (l == NULL)
8059 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8060 else {
8061 tmp = xmlBuildQName(l, p, NULL, 0);
8062 xmlFree((char *)l);
8063 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008064 p = xmlDictLookup(ctxt->dict, tmp, -1);
8065 if (tmp != NULL) xmlFree(tmp);
8066 *prefix = NULL;
8067 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008068 }
8069 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008070 xmlChar *tmp;
8071
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008072 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8073 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008074 NEXT;
8075 tmp = (xmlChar *) xmlParseName(ctxt);
8076 if (tmp != NULL) {
8077 tmp = xmlBuildQName(tmp, l, NULL, 0);
8078 l = xmlDictLookup(ctxt->dict, tmp, -1);
8079 if (tmp != NULL) xmlFree(tmp);
8080 *prefix = p;
8081 return(l);
8082 }
8083 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8084 l = xmlDictLookup(ctxt->dict, tmp, -1);
8085 if (tmp != NULL) xmlFree(tmp);
8086 *prefix = p;
8087 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008088 }
8089 *prefix = p;
8090 } else
8091 *prefix = NULL;
8092 return(l);
8093}
8094
8095/**
8096 * xmlParseQNameAndCompare:
8097 * @ctxt: an XML parser context
8098 * @name: the localname
8099 * @prefix: the prefix, if any.
8100 *
8101 * parse an XML name and compares for match
8102 * (specialized for endtag parsing)
8103 *
8104 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8105 * and the name for mismatch
8106 */
8107
8108static const xmlChar *
8109xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8110 xmlChar const *prefix) {
8111 const xmlChar *cmp = name;
8112 const xmlChar *in;
8113 const xmlChar *ret;
8114 const xmlChar *prefix2;
8115
8116 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8117
8118 GROW;
8119 in = ctxt->input->cur;
8120
8121 cmp = prefix;
8122 while (*in != 0 && *in == *cmp) {
8123 ++in;
8124 ++cmp;
8125 }
8126 if ((*cmp == 0) && (*in == ':')) {
8127 in++;
8128 cmp = name;
8129 while (*in != 0 && *in == *cmp) {
8130 ++in;
8131 ++cmp;
8132 }
William M. Brack76e95df2003-10-18 16:20:14 +00008133 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008134 /* success */
8135 ctxt->input->cur = in;
8136 return((const xmlChar*) 1);
8137 }
8138 }
8139 /*
8140 * all strings coms from the dictionary, equality can be done directly
8141 */
8142 ret = xmlParseQName (ctxt, &prefix2);
8143 if ((ret == name) && (prefix == prefix2))
8144 return((const xmlChar*) 1);
8145 return ret;
8146}
8147
8148/**
8149 * xmlParseAttValueInternal:
8150 * @ctxt: an XML parser context
8151 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008152 * @alloc: whether the attribute was reallocated as a new string
8153 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008154 *
8155 * parse a value for an attribute.
8156 * NOTE: if no normalization is needed, the routine will return pointers
8157 * directly from the data buffer.
8158 *
8159 * 3.3.3 Attribute-Value Normalization:
8160 * Before the value of an attribute is passed to the application or
8161 * checked for validity, the XML processor must normalize it as follows:
8162 * - a character reference is processed by appending the referenced
8163 * character to the attribute value
8164 * - an entity reference is processed by recursively processing the
8165 * replacement text of the entity
8166 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8167 * appending #x20 to the normalized value, except that only a single
8168 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8169 * parsed entity or the literal entity value of an internal parsed entity
8170 * - other characters are processed by appending them to the normalized value
8171 * If the declared value is not CDATA, then the XML processor must further
8172 * process the normalized attribute value by discarding any leading and
8173 * trailing space (#x20) characters, and by replacing sequences of space
8174 * (#x20) characters by a single space (#x20) character.
8175 * All attributes for which no declaration has been read should be treated
8176 * by a non-validating parser as if declared CDATA.
8177 *
8178 * Returns the AttValue parsed or NULL. The value has to be freed by the
8179 * caller if it was copied, this can be detected by val[*len] == 0.
8180 */
8181
8182static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008183xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8184 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008185{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008186 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008187 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008188 xmlChar *ret = NULL;
8189
8190 GROW;
8191 in = (xmlChar *) CUR_PTR;
8192 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008193 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008194 return (NULL);
8195 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008197
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008198 /*
8199 * try to handle in this routine the most common case where no
8200 * allocation of a new string is required and where content is
8201 * pure ASCII.
8202 */
8203 limit = *in++;
8204 end = ctxt->input->end;
8205 start = in;
8206 if (in >= end) {
8207 const xmlChar *oldbase = ctxt->input->base;
8208 GROW;
8209 if (oldbase != ctxt->input->base) {
8210 long delta = ctxt->input->base - oldbase;
8211 start = start + delta;
8212 in = in + delta;
8213 }
8214 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008215 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008216 if (normalize) {
8217 /*
8218 * Skip any leading spaces
8219 */
8220 while ((in < end) && (*in != limit) &&
8221 ((*in == 0x20) || (*in == 0x9) ||
8222 (*in == 0xA) || (*in == 0xD))) {
8223 in++;
8224 start = in;
8225 if (in >= end) {
8226 const xmlChar *oldbase = ctxt->input->base;
8227 GROW;
8228 if (oldbase != ctxt->input->base) {
8229 long delta = ctxt->input->base - oldbase;
8230 start = start + delta;
8231 in = in + delta;
8232 }
8233 end = ctxt->input->end;
8234 }
8235 }
8236 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8237 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8238 if ((*in++ == 0x20) && (*in == 0x20)) break;
8239 if (in >= end) {
8240 const xmlChar *oldbase = ctxt->input->base;
8241 GROW;
8242 if (oldbase != ctxt->input->base) {
8243 long delta = ctxt->input->base - oldbase;
8244 start = start + delta;
8245 in = in + delta;
8246 }
8247 end = ctxt->input->end;
8248 }
8249 }
8250 last = in;
8251 /*
8252 * skip the trailing blanks
8253 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008254 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008255 while ((in < end) && (*in != limit) &&
8256 ((*in == 0x20) || (*in == 0x9) ||
8257 (*in == 0xA) || (*in == 0xD))) {
8258 in++;
8259 if (in >= end) {
8260 const xmlChar *oldbase = ctxt->input->base;
8261 GROW;
8262 if (oldbase != ctxt->input->base) {
8263 long delta = ctxt->input->base - oldbase;
8264 start = start + delta;
8265 in = in + delta;
8266 last = last + delta;
8267 }
8268 end = ctxt->input->end;
8269 }
8270 }
8271 if (*in != limit) goto need_complex;
8272 } else {
8273 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8274 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8275 in++;
8276 if (in >= end) {
8277 const xmlChar *oldbase = ctxt->input->base;
8278 GROW;
8279 if (oldbase != ctxt->input->base) {
8280 long delta = ctxt->input->base - oldbase;
8281 start = start + delta;
8282 in = in + delta;
8283 }
8284 end = ctxt->input->end;
8285 }
8286 }
8287 last = in;
8288 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008289 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008290 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008291 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008292 *len = last - start;
8293 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008294 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008295 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008296 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008297 }
8298 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008299 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008300 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008301need_complex:
8302 if (alloc) *alloc = 1;
8303 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008304}
8305
8306/**
8307 * xmlParseAttribute2:
8308 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008309 * @pref: the element prefix
8310 * @elem: the element name
8311 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008313 * @len: an int * to save the length of the attribute
8314 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008315 *
8316 * parse an attribute in the new SAX2 framework.
8317 *
8318 * Returns the attribute name, and the value in *value, .
8319 */
8320
8321static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008322xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008323 const xmlChar * pref, const xmlChar * elem,
8324 const xmlChar ** prefix, xmlChar ** value,
8325 int *len, int *alloc)
8326{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008327 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008328 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008329 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008330
8331 *value = NULL;
8332 GROW;
8333 name = xmlParseQName(ctxt, prefix);
8334 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008335 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8336 "error parsing attribute name\n");
8337 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338 }
8339
8340 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008341 * get the type if needed
8342 */
8343 if (ctxt->attsSpecial != NULL) {
8344 int type;
8345
8346 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008347 pref, elem, *prefix, name);
8348 if (type != 0)
8349 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008350 }
8351
8352 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008353 * read the value
8354 */
8355 SKIP_BLANKS;
8356 if (RAW == '=') {
8357 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008358 SKIP_BLANKS;
8359 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8360 if (normalize) {
8361 /*
8362 * Sometimes a second normalisation pass for spaces is needed
8363 * but that only happens if charrefs or entities refernces
8364 * have been used in the attribute value, i.e. the attribute
8365 * value have been extracted in an allocated string already.
8366 */
8367 if (*alloc) {
8368 const xmlChar *val2;
8369
8370 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008371 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008372 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008373 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008374 }
8375 }
8376 }
8377 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008378 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008379 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8380 "Specification mandate value for attribute %s\n",
8381 name);
8382 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008383 }
8384
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008385 if (*prefix == ctxt->str_xml) {
8386 /*
8387 * Check that xml:lang conforms to the specification
8388 * No more registered as an error, just generate a warning now
8389 * since this was deprecated in XML second edition
8390 */
8391 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8392 internal_val = xmlStrndup(val, *len);
8393 if (!xmlCheckLanguageID(internal_val)) {
8394 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8395 "Malformed value for xml:lang : %s\n",
8396 internal_val, NULL);
8397 }
8398 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008399
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008400 /*
8401 * Check that xml:space conforms to the specification
8402 */
8403 if (xmlStrEqual(name, BAD_CAST "space")) {
8404 internal_val = xmlStrndup(val, *len);
8405 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8406 *(ctxt->space) = 0;
8407 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8408 *(ctxt->space) = 1;
8409 else {
8410 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8411 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8412 internal_val, NULL);
8413 }
8414 }
8415 if (internal_val) {
8416 xmlFree(internal_val);
8417 }
8418 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008419
8420 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008421 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008422}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423/**
8424 * xmlParseStartTag2:
8425 * @ctxt: an XML parser context
8426 *
8427 * parse a start of tag either for rule element or
8428 * EmptyElement. In both case we don't parse the tag closing chars.
8429 * This routine is called when running SAX2 parsing
8430 *
8431 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8432 *
8433 * [ WFC: Unique Att Spec ]
8434 * No attribute name may appear more than once in the same start-tag or
8435 * empty-element tag.
8436 *
8437 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8438 *
8439 * [ WFC: Unique Att Spec ]
8440 * No attribute name may appear more than once in the same start-tag or
8441 * empty-element tag.
8442 *
8443 * With namespace:
8444 *
8445 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8446 *
8447 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8448 *
8449 * Returns the element name parsed
8450 */
8451
8452static const xmlChar *
8453xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008454 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008455 const xmlChar *localname;
8456 const xmlChar *prefix;
8457 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008458 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 const xmlChar *nsname;
8460 xmlChar *attvalue;
8461 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008462 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008463 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008464 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008465 const xmlChar *base;
8466 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008467 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008468
8469 if (RAW != '<') return(NULL);
8470 NEXT1;
8471
8472 /*
8473 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8474 * point since the attribute values may be stored as pointers to
8475 * the buffer and calling SHRINK would destroy them !
8476 * The Shrinking is only possible once the full set of attribute
8477 * callbacks have been done.
8478 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008479reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008481 base = ctxt->input->base;
8482 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008483 oldline = ctxt->input->line;
8484 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008485 nbatts = 0;
8486 nratts = 0;
8487 nbdef = 0;
8488 nbNs = 0;
8489 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008490 /* Forget any namespaces added during an earlier parse of this element. */
8491 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008492
8493 localname = xmlParseQName(ctxt, &prefix);
8494 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008495 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8496 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008497 return(NULL);
8498 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008499 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500
8501 /*
8502 * Now parse the attributes, it ends up with the ending
8503 *
8504 * (S Attribute)* S?
8505 */
8506 SKIP_BLANKS;
8507 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008508 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008509
8510 while ((RAW != '>') &&
8511 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008512 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008513 const xmlChar *q = CUR_PTR;
8514 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008515 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008516
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008517 attname = xmlParseAttribute2(ctxt, prefix, localname,
8518 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008519 if (ctxt->input->base != base) {
8520 if ((attvalue != NULL) && (alloc != 0))
8521 xmlFree(attvalue);
8522 attvalue = NULL;
8523 goto base_changed;
8524 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008525 if ((attname != NULL) && (attvalue != NULL)) {
8526 if (len < 0) len = xmlStrlen(attvalue);
8527 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008528 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8529 xmlURIPtr uri;
8530
8531 if (*URL != 0) {
8532 uri = xmlParseURI((const char *) URL);
8533 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008534 xmlNsErr(ctxt, XML_WAR_NS_URI,
8535 "xmlns: '%s' is not a valid URI\n",
8536 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008537 } else {
Daniel Veillard37334572008-07-31 08:20:02 +00008538 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8539 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8540 "xmlns: URI %s is not absolute\n",
8541 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008542 }
8543 xmlFreeURI(uri);
8544 }
Daniel Veillard37334572008-07-31 08:20:02 +00008545 if (URL == ctxt->str_xml_ns) {
8546 if (attname != ctxt->str_xml) {
8547 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8548 "xml namespace URI cannot be the default namespace\n",
8549 NULL, NULL, NULL);
8550 }
8551 goto skip_default_ns;
8552 }
8553 if ((len == 29) &&
8554 (xmlStrEqual(URL,
8555 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8556 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8557 "reuse of the xmlns namespace name is forbidden\n",
8558 NULL, NULL, NULL);
8559 goto skip_default_ns;
8560 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008561 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008563 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008565 for (j = 1;j <= nbNs;j++)
8566 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8567 break;
8568 if (j <= nbNs)
8569 xmlErrAttributeDup(ctxt, NULL, attname);
8570 else
8571 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008572skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008573 if (alloc != 0) xmlFree(attvalue);
8574 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008575 continue;
8576 }
8577 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008578 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8579 xmlURIPtr uri;
8580
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008581 if (attname == ctxt->str_xml) {
8582 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008583 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8584 "xml namespace prefix mapped to wrong URI\n",
8585 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008586 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008587 /*
8588 * Do not keep a namespace definition node
8589 */
Daniel Veillard37334572008-07-31 08:20:02 +00008590 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008591 }
Daniel Veillard37334572008-07-31 08:20:02 +00008592 if (URL == ctxt->str_xml_ns) {
8593 if (attname != ctxt->str_xml) {
8594 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8595 "xml namespace URI mapped to wrong prefix\n",
8596 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008597 }
Daniel Veillard37334572008-07-31 08:20:02 +00008598 goto skip_ns;
8599 }
8600 if (attname == ctxt->str_xmlns) {
8601 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8602 "redefinition of the xmlns prefix is forbidden\n",
8603 NULL, NULL, NULL);
8604 goto skip_ns;
8605 }
8606 if ((len == 29) &&
8607 (xmlStrEqual(URL,
8608 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8609 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8610 "reuse of the xmlns namespace name is forbidden\n",
8611 NULL, NULL, NULL);
8612 goto skip_ns;
8613 }
8614 if ((URL == NULL) || (URL[0] == 0)) {
8615 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8616 "xmlns:%s: Empty XML namespace is not allowed\n",
8617 attname, NULL, NULL);
8618 goto skip_ns;
8619 } else {
8620 uri = xmlParseURI((const char *) URL);
8621 if (uri == NULL) {
8622 xmlNsErr(ctxt, XML_WAR_NS_URI,
8623 "xmlns:%s: '%s' is not a valid URI\n",
8624 attname, URL, NULL);
8625 } else {
8626 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8627 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8628 "xmlns:%s: URI %s is not absolute\n",
8629 attname, URL, NULL);
8630 }
8631 xmlFreeURI(uri);
8632 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 }
8634
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008636 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008638 for (j = 1;j <= nbNs;j++)
8639 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8640 break;
8641 if (j <= nbNs)
8642 xmlErrAttributeDup(ctxt, aprefix, attname);
8643 else
8644 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008645skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008646 if (alloc != 0) xmlFree(attvalue);
8647 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008648 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 continue;
8650 }
8651
8652 /*
8653 * Add the pair to atts
8654 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008655 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8656 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008657 if (attvalue[len] == 0)
8658 xmlFree(attvalue);
8659 goto failed;
8660 }
8661 maxatts = ctxt->maxatts;
8662 atts = ctxt->atts;
8663 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008664 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008665 atts[nbatts++] = attname;
8666 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008667 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668 atts[nbatts++] = attvalue;
8669 attvalue += len;
8670 atts[nbatts++] = attvalue;
8671 /*
8672 * tag if some deallocation is needed
8673 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008674 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008675 } else {
8676 if ((attvalue != NULL) && (attvalue[len] == 0))
8677 xmlFree(attvalue);
8678 }
8679
Daniel Veillard37334572008-07-31 08:20:02 +00008680failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008681
8682 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008683 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8685 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008686 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8688 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008689 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008690 }
8691 SKIP_BLANKS;
8692 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8693 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008694 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008695 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008696 break;
8697 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008698 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008699 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008700 }
8701
Daniel Veillard0fb18932003-09-07 09:14:37 +00008702 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008703 * The attributes defaulting
8704 */
8705 if (ctxt->attsDefault != NULL) {
8706 xmlDefAttrsPtr defaults;
8707
8708 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8709 if (defaults != NULL) {
8710 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008711 attname = defaults->values[5 * i];
8712 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713
8714 /*
8715 * special work for namespaces defaulted defs
8716 */
8717 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8718 /*
8719 * check that it's not a defined namespace
8720 */
8721 for (j = 1;j <= nbNs;j++)
8722 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8723 break;
8724 if (j <= nbNs) continue;
8725
8726 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008727 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008728 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008729 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008730 nbNs++;
8731 }
8732 } else if (aprefix == ctxt->str_xmlns) {
8733 /*
8734 * check that it's not a defined namespace
8735 */
8736 for (j = 1;j <= nbNs;j++)
8737 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8738 break;
8739 if (j <= nbNs) continue;
8740
8741 nsname = xmlGetNamespace(ctxt, attname);
8742 if (nsname != defaults->values[2]) {
8743 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008744 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008745 nbNs++;
8746 }
8747 } else {
8748 /*
8749 * check that it's not a defined attribute
8750 */
8751 for (j = 0;j < nbatts;j+=5) {
8752 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8753 break;
8754 }
8755 if (j < nbatts) continue;
8756
8757 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8758 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008759 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 }
8761 maxatts = ctxt->maxatts;
8762 atts = ctxt->atts;
8763 }
8764 atts[nbatts++] = attname;
8765 atts[nbatts++] = aprefix;
8766 if (aprefix == NULL)
8767 atts[nbatts++] = NULL;
8768 else
8769 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008770 atts[nbatts++] = defaults->values[5 * i + 2];
8771 atts[nbatts++] = defaults->values[5 * i + 3];
8772 if ((ctxt->standalone == 1) &&
8773 (defaults->values[5 * i + 4] != NULL)) {
8774 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8775 "standalone: attribute %s on %s defaulted from external subset\n",
8776 attname, localname);
8777 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008778 nbdef++;
8779 }
8780 }
8781 }
8782 }
8783
Daniel Veillarde70c8772003-11-25 07:21:18 +00008784 /*
8785 * The attributes checkings
8786 */
8787 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008788 /*
8789 * The default namespace does not apply to attribute names.
8790 */
8791 if (atts[i + 1] != NULL) {
8792 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8793 if (nsname == NULL) {
8794 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8795 "Namespace prefix %s for %s on %s is not defined\n",
8796 atts[i + 1], atts[i], localname);
8797 }
8798 atts[i + 2] = nsname;
8799 } else
8800 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008801 /*
8802 * [ WFC: Unique Att Spec ]
8803 * No attribute name may appear more than once in the same
8804 * start-tag or empty-element tag.
8805 * As extended by the Namespace in XML REC.
8806 */
8807 for (j = 0; j < i;j += 5) {
8808 if (atts[i] == atts[j]) {
8809 if (atts[i+1] == atts[j+1]) {
8810 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8811 break;
8812 }
8813 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8814 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8815 "Namespaced Attribute %s in '%s' redefined\n",
8816 atts[i], nsname, NULL);
8817 break;
8818 }
8819 }
8820 }
8821 }
8822
Daniel Veillarde57ec792003-09-10 10:50:59 +00008823 nsname = xmlGetNamespace(ctxt, prefix);
8824 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008825 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8826 "Namespace prefix %s on %s is not defined\n",
8827 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008828 }
8829 *pref = prefix;
8830 *URI = nsname;
8831
8832 /*
8833 * SAX: Start of Element !
8834 */
8835 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8836 (!ctxt->disableSAX)) {
8837 if (nbNs > 0)
8838 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8839 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8840 nbatts / 5, nbdef, atts);
8841 else
8842 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8843 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8844 }
8845
8846 /*
8847 * Free up attribute allocated strings if needed
8848 */
8849 if (attval != 0) {
8850 for (i = 3,j = 0; j < nratts;i += 5,j++)
8851 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8852 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008853 }
8854
8855 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008856
8857base_changed:
8858 /*
8859 * the attribute strings are valid iif the base didn't changed
8860 */
8861 if (attval != 0) {
8862 for (i = 3,j = 0; j < nratts;i += 5,j++)
8863 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8864 xmlFree((xmlChar *) atts[i]);
8865 }
8866 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008867 ctxt->input->line = oldline;
8868 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008869 if (ctxt->wellFormed == 1) {
8870 goto reparse;
8871 }
8872 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873}
8874
8875/**
8876 * xmlParseEndTag2:
8877 * @ctxt: an XML parser context
8878 * @line: line of the start tag
8879 * @nsNr: number of namespaces on the start tag
8880 *
8881 * parse an end of tag
8882 *
8883 * [42] ETag ::= '</' Name S? '>'
8884 *
8885 * With namespace
8886 *
8887 * [NS 9] ETag ::= '</' QName S? '>'
8888 */
8889
8890static void
8891xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008892 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 const xmlChar *name;
8894
8895 GROW;
8896 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008897 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008898 return;
8899 }
8900 SKIP(2);
8901
William M. Brack13dfa872004-09-18 04:52:08 +00008902 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008903 if (ctxt->input->cur[tlen] == '>') {
8904 ctxt->input->cur += tlen + 1;
8905 goto done;
8906 }
8907 ctxt->input->cur += tlen;
8908 name = (xmlChar*)1;
8909 } else {
8910 if (prefix == NULL)
8911 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8912 else
8913 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8914 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915
8916 /*
8917 * We should definitely be at the ending "S? '>'" part
8918 */
8919 GROW;
8920 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008921 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008922 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008923 } else
8924 NEXT1;
8925
8926 /*
8927 * [ WFC: Element Type Match ]
8928 * The Name in an element's end-tag must match the element type in the
8929 * start-tag.
8930 *
8931 */
8932 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008933 if (name == NULL) name = BAD_CAST "unparseable";
8934 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008935 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008936 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008937 }
8938
8939 /*
8940 * SAX: End of Tag
8941 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008942done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008943 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8944 (!ctxt->disableSAX))
8945 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8946
Daniel Veillard0fb18932003-09-07 09:14:37 +00008947 spacePop(ctxt);
8948 if (nsNr != 0)
8949 nsPop(ctxt, nsNr);
8950 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008951}
8952
8953/**
Owen Taylor3473f882001-02-23 17:55:21 +00008954 * xmlParseCDSect:
8955 * @ctxt: an XML parser context
8956 *
8957 * Parse escaped pure raw content.
8958 *
8959 * [18] CDSect ::= CDStart CData CDEnd
8960 *
8961 * [19] CDStart ::= '<![CDATA['
8962 *
8963 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8964 *
8965 * [21] CDEnd ::= ']]>'
8966 */
8967void
8968xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8969 xmlChar *buf = NULL;
8970 int len = 0;
8971 int size = XML_PARSER_BUFFER_SIZE;
8972 int r, rl;
8973 int s, sl;
8974 int cur, l;
8975 int count = 0;
8976
Daniel Veillard8f597c32003-10-06 08:19:27 +00008977 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008978 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008979 SKIP(9);
8980 } else
8981 return;
8982
8983 ctxt->instate = XML_PARSER_CDATA_SECTION;
8984 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008985 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008986 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008987 ctxt->instate = XML_PARSER_CONTENT;
8988 return;
8989 }
8990 NEXTL(rl);
8991 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008992 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008993 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008994 ctxt->instate = XML_PARSER_CONTENT;
8995 return;
8996 }
8997 NEXTL(sl);
8998 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008999 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009000 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009001 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009002 return;
9003 }
William M. Brack871611b2003-10-18 04:53:14 +00009004 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009005 ((r != ']') || (s != ']') || (cur != '>'))) {
9006 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009007 xmlChar *tmp;
9008
Owen Taylor3473f882001-02-23 17:55:21 +00009009 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009010 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9011 if (tmp == NULL) {
9012 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009013 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009014 return;
9015 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009016 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009017 }
9018 COPY_BUF(rl,buf,len,r);
9019 r = s;
9020 rl = sl;
9021 s = cur;
9022 sl = l;
9023 count++;
9024 if (count > 50) {
9025 GROW;
9026 count = 0;
9027 }
9028 NEXTL(l);
9029 cur = CUR_CHAR(l);
9030 }
9031 buf[len] = 0;
9032 ctxt->instate = XML_PARSER_CONTENT;
9033 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009034 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009035 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009036 xmlFree(buf);
9037 return;
9038 }
9039 NEXTL(l);
9040
9041 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009042 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009043 */
9044 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9045 if (ctxt->sax->cdataBlock != NULL)
9046 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009047 else if (ctxt->sax->characters != NULL)
9048 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009049 }
9050 xmlFree(buf);
9051}
9052
9053/**
9054 * xmlParseContent:
9055 * @ctxt: an XML parser context
9056 *
9057 * Parse a content:
9058 *
9059 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9060 */
9061
9062void
9063xmlParseContent(xmlParserCtxtPtr ctxt) {
9064 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009065 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009066 ((RAW != '<') || (NXT(1) != '/')) &&
9067 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009068 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009069 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009070 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009071
9072 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009073 * First case : a Processing Instruction.
9074 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009075 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009076 xmlParsePI(ctxt);
9077 }
9078
9079 /*
9080 * Second case : a CDSection
9081 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009082 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009083 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009084 xmlParseCDSect(ctxt);
9085 }
9086
9087 /*
9088 * Third case : a comment
9089 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009090 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009091 (NXT(2) == '-') && (NXT(3) == '-')) {
9092 xmlParseComment(ctxt);
9093 ctxt->instate = XML_PARSER_CONTENT;
9094 }
9095
9096 /*
9097 * Fourth case : a sub-element.
9098 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009099 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009100 xmlParseElement(ctxt);
9101 }
9102
9103 /*
9104 * Fifth case : a reference. If if has not been resolved,
9105 * parsing returns it's Name, create the node
9106 */
9107
Daniel Veillard21a0f912001-02-25 19:54:14 +00009108 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009109 xmlParseReference(ctxt);
9110 }
9111
9112 /*
9113 * Last case, text. Note that References are handled directly.
9114 */
9115 else {
9116 xmlParseCharData(ctxt, 0);
9117 }
9118
9119 GROW;
9120 /*
9121 * Pop-up of finished entities.
9122 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009123 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009124 xmlPopInput(ctxt);
9125 SHRINK;
9126
Daniel Veillardfdc91562002-07-01 21:52:03 +00009127 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009128 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9129 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009130 ctxt->instate = XML_PARSER_EOF;
9131 break;
9132 }
9133 }
9134}
9135
9136/**
9137 * xmlParseElement:
9138 * @ctxt: an XML parser context
9139 *
9140 * parse an XML element, this is highly recursive
9141 *
9142 * [39] element ::= EmptyElemTag | STag content ETag
9143 *
9144 * [ WFC: Element Type Match ]
9145 * The Name in an element's end-tag must match the element type in the
9146 * start-tag.
9147 *
Owen Taylor3473f882001-02-23 17:55:21 +00009148 */
9149
9150void
9151xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009152 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153 const xmlChar *prefix;
9154 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009155 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009156 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009157 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009158 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009159
Daniel Veillard8915c152008-08-26 13:05:34 +00009160 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9161 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9162 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9163 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9164 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009165 ctxt->instate = XML_PARSER_EOF;
9166 return;
9167 }
9168
Owen Taylor3473f882001-02-23 17:55:21 +00009169 /* Capture start position */
9170 if (ctxt->record_info) {
9171 node_info.begin_pos = ctxt->input->consumed +
9172 (CUR_PTR - ctxt->input->base);
9173 node_info.begin_line = ctxt->input->line;
9174 }
9175
9176 if (ctxt->spaceNr == 0)
9177 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009178 else if (*ctxt->space == -2)
9179 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009180 else
9181 spacePush(ctxt, *ctxt->space);
9182
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009183 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009184#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009186#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009187 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009188#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189 else
9190 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009191#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009192 if (name == NULL) {
9193 spacePop(ctxt);
9194 return;
9195 }
9196 namePush(ctxt, name);
9197 ret = ctxt->node;
9198
Daniel Veillard4432df22003-09-28 18:58:27 +00009199#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009200 /*
9201 * [ VC: Root Element Type ]
9202 * The Name in the document type declaration must match the element
9203 * type of the root element.
9204 */
9205 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9206 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9207 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009208#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009209
9210 /*
9211 * Check for an Empty Element.
9212 */
9213 if ((RAW == '/') && (NXT(1) == '>')) {
9214 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215 if (ctxt->sax2) {
9216 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9217 (!ctxt->disableSAX))
9218 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009219#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220 } else {
9221 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9222 (!ctxt->disableSAX))
9223 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009224#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009225 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 namePop(ctxt);
9227 spacePop(ctxt);
9228 if (nsNr != ctxt->nsNr)
9229 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009230 if ( ret != NULL && ctxt->record_info ) {
9231 node_info.end_pos = ctxt->input->consumed +
9232 (CUR_PTR - ctxt->input->base);
9233 node_info.end_line = ctxt->input->line;
9234 node_info.node = ret;
9235 xmlParserAddNodeInfo(ctxt, &node_info);
9236 }
9237 return;
9238 }
9239 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009240 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009241 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009242 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9243 "Couldn't find end of Start Tag %s line %d\n",
9244 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009245
9246 /*
9247 * end of parsing of this node.
9248 */
9249 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009250 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009251 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009252 if (nsNr != ctxt->nsNr)
9253 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009254
9255 /*
9256 * Capture end position and add node
9257 */
9258 if ( ret != NULL && ctxt->record_info ) {
9259 node_info.end_pos = ctxt->input->consumed +
9260 (CUR_PTR - ctxt->input->base);
9261 node_info.end_line = ctxt->input->line;
9262 node_info.node = ret;
9263 xmlParserAddNodeInfo(ctxt, &node_info);
9264 }
9265 return;
9266 }
9267
9268 /*
9269 * Parse the content of the element:
9270 */
9271 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009272 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009273 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009274 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009275 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009276
9277 /*
9278 * end of parsing of this node.
9279 */
9280 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009281 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009282 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009283 if (nsNr != ctxt->nsNr)
9284 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009285 return;
9286 }
9287
9288 /*
9289 * parse the end of tag: '</' should be here.
9290 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009291 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009292 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009293 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009294 }
9295#ifdef LIBXML_SAX1_ENABLED
9296 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009297 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009298#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009299
9300 /*
9301 * Capture end position and add node
9302 */
9303 if ( ret != NULL && ctxt->record_info ) {
9304 node_info.end_pos = ctxt->input->consumed +
9305 (CUR_PTR - ctxt->input->base);
9306 node_info.end_line = ctxt->input->line;
9307 node_info.node = ret;
9308 xmlParserAddNodeInfo(ctxt, &node_info);
9309 }
9310}
9311
9312/**
9313 * xmlParseVersionNum:
9314 * @ctxt: an XML parser context
9315 *
9316 * parse the XML version value.
9317 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009318 * [26] VersionNum ::= '1.' [0-9]+
9319 *
9320 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009321 *
9322 * Returns the string giving the XML version number, or NULL
9323 */
9324xmlChar *
9325xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9326 xmlChar *buf = NULL;
9327 int len = 0;
9328 int size = 10;
9329 xmlChar cur;
9330
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009331 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009332 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009333 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009334 return(NULL);
9335 }
9336 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009337 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009338 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009339 return(NULL);
9340 }
9341 buf[len++] = cur;
9342 NEXT;
9343 cur=CUR;
9344 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009345 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009346 return(NULL);
9347 }
9348 buf[len++] = cur;
9349 NEXT;
9350 cur=CUR;
9351 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009352 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009353 xmlChar *tmp;
9354
Owen Taylor3473f882001-02-23 17:55:21 +00009355 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009356 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9357 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009358 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009359 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009360 return(NULL);
9361 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009362 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009363 }
9364 buf[len++] = cur;
9365 NEXT;
9366 cur=CUR;
9367 }
9368 buf[len] = 0;
9369 return(buf);
9370}
9371
9372/**
9373 * xmlParseVersionInfo:
9374 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009375 *
Owen Taylor3473f882001-02-23 17:55:21 +00009376 * parse the XML version.
9377 *
9378 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009379 *
Owen Taylor3473f882001-02-23 17:55:21 +00009380 * [25] Eq ::= S? '=' S?
9381 *
9382 * Returns the version string, e.g. "1.0"
9383 */
9384
9385xmlChar *
9386xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9387 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009388
Daniel Veillarda07050d2003-10-19 14:46:32 +00009389 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009390 SKIP(7);
9391 SKIP_BLANKS;
9392 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009393 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009394 return(NULL);
9395 }
9396 NEXT;
9397 SKIP_BLANKS;
9398 if (RAW == '"') {
9399 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009400 version = xmlParseVersionNum(ctxt);
9401 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009402 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009403 } else
9404 NEXT;
9405 } else if (RAW == '\''){
9406 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009407 version = xmlParseVersionNum(ctxt);
9408 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009409 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009410 } else
9411 NEXT;
9412 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009413 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009414 }
9415 }
9416 return(version);
9417}
9418
9419/**
9420 * xmlParseEncName:
9421 * @ctxt: an XML parser context
9422 *
9423 * parse the XML encoding name
9424 *
9425 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9426 *
9427 * Returns the encoding name value or NULL
9428 */
9429xmlChar *
9430xmlParseEncName(xmlParserCtxtPtr ctxt) {
9431 xmlChar *buf = NULL;
9432 int len = 0;
9433 int size = 10;
9434 xmlChar cur;
9435
9436 cur = CUR;
9437 if (((cur >= 'a') && (cur <= 'z')) ||
9438 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009439 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009440 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009441 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009442 return(NULL);
9443 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009444
Owen Taylor3473f882001-02-23 17:55:21 +00009445 buf[len++] = cur;
9446 NEXT;
9447 cur = CUR;
9448 while (((cur >= 'a') && (cur <= 'z')) ||
9449 ((cur >= 'A') && (cur <= 'Z')) ||
9450 ((cur >= '0') && (cur <= '9')) ||
9451 (cur == '.') || (cur == '_') ||
9452 (cur == '-')) {
9453 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009454 xmlChar *tmp;
9455
Owen Taylor3473f882001-02-23 17:55:21 +00009456 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009457 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9458 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009459 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009460 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009461 return(NULL);
9462 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009463 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009464 }
9465 buf[len++] = cur;
9466 NEXT;
9467 cur = CUR;
9468 if (cur == 0) {
9469 SHRINK;
9470 GROW;
9471 cur = CUR;
9472 }
9473 }
9474 buf[len] = 0;
9475 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009476 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009477 }
9478 return(buf);
9479}
9480
9481/**
9482 * xmlParseEncodingDecl:
9483 * @ctxt: an XML parser context
9484 *
9485 * parse the XML encoding declaration
9486 *
9487 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9488 *
9489 * this setups the conversion filters.
9490 *
9491 * Returns the encoding value or NULL
9492 */
9493
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009494const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009495xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9496 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009497
9498 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009499 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009500 SKIP(8);
9501 SKIP_BLANKS;
9502 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009503 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009504 return(NULL);
9505 }
9506 NEXT;
9507 SKIP_BLANKS;
9508 if (RAW == '"') {
9509 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009510 encoding = xmlParseEncName(ctxt);
9511 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009512 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009513 } else
9514 NEXT;
9515 } else if (RAW == '\''){
9516 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009517 encoding = xmlParseEncName(ctxt);
9518 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009519 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009520 } else
9521 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009522 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009524 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009525 /*
9526 * UTF-16 encoding stwich has already taken place at this stage,
9527 * more over the little-endian/big-endian selection is already done
9528 */
9529 if ((encoding != NULL) &&
9530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009532 /*
9533 * If no encoding was passed to the parser, that we are
9534 * using UTF-16 and no decoder is present i.e. the
9535 * document is apparently UTF-8 compatible, then raise an
9536 * encoding mismatch fatal error
9537 */
9538 if ((ctxt->encoding == NULL) &&
9539 (ctxt->input->buf != NULL) &&
9540 (ctxt->input->buf->encoder == NULL)) {
9541 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9542 "Document labelled UTF-16 but has UTF-8 content\n");
9543 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009544 if (ctxt->encoding != NULL)
9545 xmlFree((xmlChar *) ctxt->encoding);
9546 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009547 }
9548 /*
9549 * UTF-8 encoding is handled natively
9550 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009551 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009554 if (ctxt->encoding != NULL)
9555 xmlFree((xmlChar *) ctxt->encoding);
9556 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009557 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009558 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009559 xmlCharEncodingHandlerPtr handler;
9560
9561 if (ctxt->input->encoding != NULL)
9562 xmlFree((xmlChar *) ctxt->input->encoding);
9563 ctxt->input->encoding = encoding;
9564
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009565 handler = xmlFindCharEncodingHandler((const char *) encoding);
9566 if (handler != NULL) {
9567 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009568 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009569 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009570 "Unsupported encoding %s\n", encoding);
9571 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009572 }
9573 }
9574 }
9575 return(encoding);
9576}
9577
9578/**
9579 * xmlParseSDDecl:
9580 * @ctxt: an XML parser context
9581 *
9582 * parse the XML standalone declaration
9583 *
9584 * [32] SDDecl ::= S 'standalone' Eq
9585 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9586 *
9587 * [ VC: Standalone Document Declaration ]
9588 * TODO The standalone document declaration must have the value "no"
9589 * if any external markup declarations contain declarations of:
9590 * - attributes with default values, if elements to which these
9591 * attributes apply appear in the document without specifications
9592 * of values for these attributes, or
9593 * - entities (other than amp, lt, gt, apos, quot), if references
9594 * to those entities appear in the document, or
9595 * - attributes with values subject to normalization, where the
9596 * attribute appears in the document with a value which will change
9597 * as a result of normalization, or
9598 * - element types with element content, if white space occurs directly
9599 * within any instance of those types.
9600 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009601 * Returns:
9602 * 1 if standalone="yes"
9603 * 0 if standalone="no"
9604 * -2 if standalone attribute is missing or invalid
9605 * (A standalone value of -2 means that the XML declaration was found,
9606 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009607 */
9608
9609int
9610xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009611 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009612
9613 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009614 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009615 SKIP(10);
9616 SKIP_BLANKS;
9617 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009618 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009619 return(standalone);
9620 }
9621 NEXT;
9622 SKIP_BLANKS;
9623 if (RAW == '\''){
9624 NEXT;
9625 if ((RAW == 'n') && (NXT(1) == 'o')) {
9626 standalone = 0;
9627 SKIP(2);
9628 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9629 (NXT(2) == 's')) {
9630 standalone = 1;
9631 SKIP(3);
9632 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009633 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009634 }
9635 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009636 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009637 } else
9638 NEXT;
9639 } else if (RAW == '"'){
9640 NEXT;
9641 if ((RAW == 'n') && (NXT(1) == 'o')) {
9642 standalone = 0;
9643 SKIP(2);
9644 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9645 (NXT(2) == 's')) {
9646 standalone = 1;
9647 SKIP(3);
9648 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009649 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009650 }
9651 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009652 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009653 } else
9654 NEXT;
9655 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009656 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009657 }
9658 }
9659 return(standalone);
9660}
9661
9662/**
9663 * xmlParseXMLDecl:
9664 * @ctxt: an XML parser context
9665 *
9666 * parse an XML declaration header
9667 *
9668 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9669 */
9670
9671void
9672xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9673 xmlChar *version;
9674
9675 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009676 * This value for standalone indicates that the document has an
9677 * XML declaration but it does not have a standalone attribute.
9678 * It will be overwritten later if a standalone attribute is found.
9679 */
9680 ctxt->input->standalone = -2;
9681
9682 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009683 * We know that '<?xml' is here.
9684 */
9685 SKIP(5);
9686
William M. Brack76e95df2003-10-18 16:20:14 +00009687 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9689 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009690 }
9691 SKIP_BLANKS;
9692
9693 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009694 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009695 */
9696 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009697 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009698 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009699 } else {
9700 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9701 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009702 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009703 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009704 if (ctxt->options & XML_PARSE_OLD10) {
9705 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9706 "Unsupported version '%s'\n",
9707 version);
9708 } else {
9709 if ((version[0] == '1') && ((version[1] == '.'))) {
9710 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9711 "Unsupported version '%s'\n",
9712 version, NULL);
9713 } else {
9714 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9715 "Unsupported version '%s'\n",
9716 version);
9717 }
9718 }
Daniel Veillard19840942001-11-29 16:11:38 +00009719 }
9720 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009721 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009722 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009723 }
Owen Taylor3473f882001-02-23 17:55:21 +00009724
9725 /*
9726 * We may have the encoding declaration
9727 */
William M. Brack76e95df2003-10-18 16:20:14 +00009728 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009729 if ((RAW == '?') && (NXT(1) == '>')) {
9730 SKIP(2);
9731 return;
9732 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009733 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009734 }
9735 xmlParseEncodingDecl(ctxt);
9736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9737 /*
9738 * The XML REC instructs us to stop parsing right here
9739 */
9740 return;
9741 }
9742
9743 /*
9744 * We may have the standalone status.
9745 */
William M. Brack76e95df2003-10-18 16:20:14 +00009746 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009747 if ((RAW == '?') && (NXT(1) == '>')) {
9748 SKIP(2);
9749 return;
9750 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009751 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009752 }
9753 SKIP_BLANKS;
9754 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9755
9756 SKIP_BLANKS;
9757 if ((RAW == '?') && (NXT(1) == '>')) {
9758 SKIP(2);
9759 } else if (RAW == '>') {
9760 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009761 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009762 NEXT;
9763 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009764 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009765 MOVETO_ENDTAG(CUR_PTR);
9766 NEXT;
9767 }
9768}
9769
9770/**
9771 * xmlParseMisc:
9772 * @ctxt: an XML parser context
9773 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009774 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009775 *
9776 * [27] Misc ::= Comment | PI | S
9777 */
9778
9779void
9780xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009781 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009782 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009783 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009784 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009785 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009786 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009787 NEXT;
9788 } else
9789 xmlParseComment(ctxt);
9790 }
9791}
9792
9793/**
9794 * xmlParseDocument:
9795 * @ctxt: an XML parser context
9796 *
9797 * parse an XML document (and build a tree if using the standard SAX
9798 * interface).
9799 *
9800 * [1] document ::= prolog element Misc*
9801 *
9802 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9803 *
9804 * Returns 0, -1 in case of error. the parser context is augmented
9805 * as a result of the parsing.
9806 */
9807
9808int
9809xmlParseDocument(xmlParserCtxtPtr ctxt) {
9810 xmlChar start[4];
9811 xmlCharEncoding enc;
9812
9813 xmlInitParser();
9814
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009815 if ((ctxt == NULL) || (ctxt->input == NULL))
9816 return(-1);
9817
Owen Taylor3473f882001-02-23 17:55:21 +00009818 GROW;
9819
9820 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009821 * SAX: detecting the level.
9822 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009823 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009824
9825 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009826 * SAX: beginning of the document processing.
9827 */
9828 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9829 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9830
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009831 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9832 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009833 /*
9834 * Get the 4 first bytes and decode the charset
9835 * if enc != XML_CHAR_ENCODING_NONE
9836 * plug some encoding conversion routines.
9837 */
9838 start[0] = RAW;
9839 start[1] = NXT(1);
9840 start[2] = NXT(2);
9841 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009842 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009843 if (enc != XML_CHAR_ENCODING_NONE) {
9844 xmlSwitchEncoding(ctxt, enc);
9845 }
Owen Taylor3473f882001-02-23 17:55:21 +00009846 }
9847
9848
9849 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009850 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009851 }
9852
9853 /*
9854 * Check for the XMLDecl in the Prolog.
9855 */
9856 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009857 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009858
9859 /*
9860 * Note that we will switch encoding on the fly.
9861 */
9862 xmlParseXMLDecl(ctxt);
9863 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9864 /*
9865 * The XML REC instructs us to stop parsing right here
9866 */
9867 return(-1);
9868 }
9869 ctxt->standalone = ctxt->input->standalone;
9870 SKIP_BLANKS;
9871 } else {
9872 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9873 }
9874 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9875 ctxt->sax->startDocument(ctxt->userData);
9876
9877 /*
9878 * The Misc part of the Prolog
9879 */
9880 GROW;
9881 xmlParseMisc(ctxt);
9882
9883 /*
9884 * Then possibly doc type declaration(s) and more Misc
9885 * (doctypedecl Misc*)?
9886 */
9887 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009888 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009889
9890 ctxt->inSubset = 1;
9891 xmlParseDocTypeDecl(ctxt);
9892 if (RAW == '[') {
9893 ctxt->instate = XML_PARSER_DTD;
9894 xmlParseInternalSubset(ctxt);
9895 }
9896
9897 /*
9898 * Create and update the external subset.
9899 */
9900 ctxt->inSubset = 2;
9901 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9902 (!ctxt->disableSAX))
9903 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9904 ctxt->extSubSystem, ctxt->extSubURI);
9905 ctxt->inSubset = 0;
9906
Daniel Veillardac4118d2008-01-11 05:27:32 +00009907 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009908
9909 ctxt->instate = XML_PARSER_PROLOG;
9910 xmlParseMisc(ctxt);
9911 }
9912
9913 /*
9914 * Time to start parsing the tree itself
9915 */
9916 GROW;
9917 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009918 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9919 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009920 } else {
9921 ctxt->instate = XML_PARSER_CONTENT;
9922 xmlParseElement(ctxt);
9923 ctxt->instate = XML_PARSER_EPILOG;
9924
9925
9926 /*
9927 * The Misc part at the end
9928 */
9929 xmlParseMisc(ctxt);
9930
Daniel Veillard561b7f82002-03-20 21:55:57 +00009931 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009932 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009933 }
9934 ctxt->instate = XML_PARSER_EOF;
9935 }
9936
9937 /*
9938 * SAX: end of the document processing.
9939 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009940 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009941 ctxt->sax->endDocument(ctxt->userData);
9942
Daniel Veillard5997aca2002-03-18 18:36:20 +00009943 /*
9944 * Remove locally kept entity definitions if the tree was not built
9945 */
9946 if ((ctxt->myDoc != NULL) &&
9947 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9948 xmlFreeDoc(ctxt->myDoc);
9949 ctxt->myDoc = NULL;
9950 }
9951
Daniel Veillardae0765b2008-07-31 19:54:59 +00009952 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
9953 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
9954 if (ctxt->valid)
9955 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
9956 if (ctxt->nsWellFormed)
9957 ctxt->myDoc->properties |= XML_DOC_NSVALID;
9958 if (ctxt->options & XML_PARSE_OLD10)
9959 ctxt->myDoc->properties |= XML_DOC_OLD10;
9960 }
Daniel Veillardc7612992002-02-17 22:47:37 +00009961 if (! ctxt->wellFormed) {
9962 ctxt->valid = 0;
9963 return(-1);
9964 }
Owen Taylor3473f882001-02-23 17:55:21 +00009965 return(0);
9966}
9967
9968/**
9969 * xmlParseExtParsedEnt:
9970 * @ctxt: an XML parser context
9971 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009972 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009973 * An external general parsed entity is well-formed if it matches the
9974 * production labeled extParsedEnt.
9975 *
9976 * [78] extParsedEnt ::= TextDecl? content
9977 *
9978 * Returns 0, -1 in case of error. the parser context is augmented
9979 * as a result of the parsing.
9980 */
9981
9982int
9983xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9984 xmlChar start[4];
9985 xmlCharEncoding enc;
9986
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009987 if ((ctxt == NULL) || (ctxt->input == NULL))
9988 return(-1);
9989
Owen Taylor3473f882001-02-23 17:55:21 +00009990 xmlDefaultSAXHandlerInit();
9991
Daniel Veillard309f81d2003-09-23 09:02:53 +00009992 xmlDetectSAX2(ctxt);
9993
Owen Taylor3473f882001-02-23 17:55:21 +00009994 GROW;
9995
9996 /*
9997 * SAX: beginning of the document processing.
9998 */
9999 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10000 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10001
10002 /*
10003 * Get the 4 first bytes and decode the charset
10004 * if enc != XML_CHAR_ENCODING_NONE
10005 * plug some encoding conversion routines.
10006 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010007 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10008 start[0] = RAW;
10009 start[1] = NXT(1);
10010 start[2] = NXT(2);
10011 start[3] = NXT(3);
10012 enc = xmlDetectCharEncoding(start, 4);
10013 if (enc != XML_CHAR_ENCODING_NONE) {
10014 xmlSwitchEncoding(ctxt, enc);
10015 }
Owen Taylor3473f882001-02-23 17:55:21 +000010016 }
10017
10018
10019 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010020 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010021 }
10022
10023 /*
10024 * Check for the XMLDecl in the Prolog.
10025 */
10026 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010027 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010028
10029 /*
10030 * Note that we will switch encoding on the fly.
10031 */
10032 xmlParseXMLDecl(ctxt);
10033 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10034 /*
10035 * The XML REC instructs us to stop parsing right here
10036 */
10037 return(-1);
10038 }
10039 SKIP_BLANKS;
10040 } else {
10041 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10042 }
10043 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10044 ctxt->sax->startDocument(ctxt->userData);
10045
10046 /*
10047 * Doing validity checking on chunk doesn't make sense
10048 */
10049 ctxt->instate = XML_PARSER_CONTENT;
10050 ctxt->validate = 0;
10051 ctxt->loadsubset = 0;
10052 ctxt->depth = 0;
10053
10054 xmlParseContent(ctxt);
10055
10056 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010057 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010059 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010060 }
10061
10062 /*
10063 * SAX: end of the document processing.
10064 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010065 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010066 ctxt->sax->endDocument(ctxt->userData);
10067
10068 if (! ctxt->wellFormed) return(-1);
10069 return(0);
10070}
10071
Daniel Veillard73b013f2003-09-30 12:36:01 +000010072#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010073/************************************************************************
10074 * *
10075 * Progressive parsing interfaces *
10076 * *
10077 ************************************************************************/
10078
10079/**
10080 * xmlParseLookupSequence:
10081 * @ctxt: an XML parser context
10082 * @first: the first char to lookup
10083 * @next: the next char to lookup or zero
10084 * @third: the next char to lookup or zero
10085 *
10086 * Try to find if a sequence (first, next, third) or just (first next) or
10087 * (first) is available in the input stream.
10088 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10089 * to avoid rescanning sequences of bytes, it DOES change the state of the
10090 * parser, do not use liberally.
10091 *
10092 * Returns the index to the current parsing point if the full sequence
10093 * is available, -1 otherwise.
10094 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010095static int
Owen Taylor3473f882001-02-23 17:55:21 +000010096xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10097 xmlChar next, xmlChar third) {
10098 int base, len;
10099 xmlParserInputPtr in;
10100 const xmlChar *buf;
10101
10102 in = ctxt->input;
10103 if (in == NULL) return(-1);
10104 base = in->cur - in->base;
10105 if (base < 0) return(-1);
10106 if (ctxt->checkIndex > base)
10107 base = ctxt->checkIndex;
10108 if (in->buf == NULL) {
10109 buf = in->base;
10110 len = in->length;
10111 } else {
10112 buf = in->buf->buffer->content;
10113 len = in->buf->buffer->use;
10114 }
10115 /* take into account the sequence length */
10116 if (third) len -= 2;
10117 else if (next) len --;
10118 for (;base < len;base++) {
10119 if (buf[base] == first) {
10120 if (third != 0) {
10121 if ((buf[base + 1] != next) ||
10122 (buf[base + 2] != third)) continue;
10123 } else if (next != 0) {
10124 if (buf[base + 1] != next) continue;
10125 }
10126 ctxt->checkIndex = 0;
10127#ifdef DEBUG_PUSH
10128 if (next == 0)
10129 xmlGenericError(xmlGenericErrorContext,
10130 "PP: lookup '%c' found at %d\n",
10131 first, base);
10132 else if (third == 0)
10133 xmlGenericError(xmlGenericErrorContext,
10134 "PP: lookup '%c%c' found at %d\n",
10135 first, next, base);
10136 else
10137 xmlGenericError(xmlGenericErrorContext,
10138 "PP: lookup '%c%c%c' found at %d\n",
10139 first, next, third, base);
10140#endif
10141 return(base - (in->cur - in->base));
10142 }
10143 }
10144 ctxt->checkIndex = base;
10145#ifdef DEBUG_PUSH
10146 if (next == 0)
10147 xmlGenericError(xmlGenericErrorContext,
10148 "PP: lookup '%c' failed\n", first);
10149 else if (third == 0)
10150 xmlGenericError(xmlGenericErrorContext,
10151 "PP: lookup '%c%c' failed\n", first, next);
10152 else
10153 xmlGenericError(xmlGenericErrorContext,
10154 "PP: lookup '%c%c%c' failed\n", first, next, third);
10155#endif
10156 return(-1);
10157}
10158
10159/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010160 * xmlParseGetLasts:
10161 * @ctxt: an XML parser context
10162 * @lastlt: pointer to store the last '<' from the input
10163 * @lastgt: pointer to store the last '>' from the input
10164 *
10165 * Lookup the last < and > in the current chunk
10166 */
10167static void
10168xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10169 const xmlChar **lastgt) {
10170 const xmlChar *tmp;
10171
10172 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10173 xmlGenericError(xmlGenericErrorContext,
10174 "Internal error: xmlParseGetLasts\n");
10175 return;
10176 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010177 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010178 tmp = ctxt->input->end;
10179 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010180 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010181 if (tmp < ctxt->input->base) {
10182 *lastlt = NULL;
10183 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010184 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010185 *lastlt = tmp;
10186 tmp++;
10187 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10188 if (*tmp == '\'') {
10189 tmp++;
10190 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10191 if (tmp < ctxt->input->end) tmp++;
10192 } else if (*tmp == '"') {
10193 tmp++;
10194 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10195 if (tmp < ctxt->input->end) tmp++;
10196 } else
10197 tmp++;
10198 }
10199 if (tmp < ctxt->input->end)
10200 *lastgt = tmp;
10201 else {
10202 tmp = *lastlt;
10203 tmp--;
10204 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10205 if (tmp >= ctxt->input->base)
10206 *lastgt = tmp;
10207 else
10208 *lastgt = NULL;
10209 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010210 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010211 } else {
10212 *lastlt = NULL;
10213 *lastgt = NULL;
10214 }
10215}
10216/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010217 * xmlCheckCdataPush:
10218 * @cur: pointer to the bock of characters
10219 * @len: length of the block in bytes
10220 *
10221 * Check that the block of characters is okay as SCdata content [20]
10222 *
10223 * Returns the number of bytes to pass if okay, a negative index where an
10224 * UTF-8 error occured otherwise
10225 */
10226static int
10227xmlCheckCdataPush(const xmlChar *utf, int len) {
10228 int ix;
10229 unsigned char c;
10230 int codepoint;
10231
10232 if ((utf == NULL) || (len <= 0))
10233 return(0);
10234
10235 for (ix = 0; ix < len;) { /* string is 0-terminated */
10236 c = utf[ix];
10237 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10238 if (c >= 0x20)
10239 ix++;
10240 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10241 ix++;
10242 else
10243 return(-ix);
10244 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10245 if (ix + 2 > len) return(ix);
10246 if ((utf[ix+1] & 0xc0 ) != 0x80)
10247 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010248 codepoint = (utf[ix] & 0x1f) << 6;
10249 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010250 if (!xmlIsCharQ(codepoint))
10251 return(-ix);
10252 ix += 2;
10253 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10254 if (ix + 3 > len) return(ix);
10255 if (((utf[ix+1] & 0xc0) != 0x80) ||
10256 ((utf[ix+2] & 0xc0) != 0x80))
10257 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010258 codepoint = (utf[ix] & 0xf) << 12;
10259 codepoint |= (utf[ix+1] & 0x3f) << 6;
10260 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010261 if (!xmlIsCharQ(codepoint))
10262 return(-ix);
10263 ix += 3;
10264 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10265 if (ix + 4 > len) return(ix);
10266 if (((utf[ix+1] & 0xc0) != 0x80) ||
10267 ((utf[ix+2] & 0xc0) != 0x80) ||
10268 ((utf[ix+3] & 0xc0) != 0x80))
10269 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010270 codepoint = (utf[ix] & 0x7) << 18;
10271 codepoint |= (utf[ix+1] & 0x3f) << 12;
10272 codepoint |= (utf[ix+2] & 0x3f) << 6;
10273 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010274 if (!xmlIsCharQ(codepoint))
10275 return(-ix);
10276 ix += 4;
10277 } else /* unknown encoding */
10278 return(-ix);
10279 }
10280 return(ix);
10281}
10282
10283/**
Owen Taylor3473f882001-02-23 17:55:21 +000010284 * xmlParseTryOrFinish:
10285 * @ctxt: an XML parser context
10286 * @terminate: last chunk indicator
10287 *
10288 * Try to progress on parsing
10289 *
10290 * Returns zero if no parsing was possible
10291 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010292static int
Owen Taylor3473f882001-02-23 17:55:21 +000010293xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10294 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010295 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010296 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010297 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010298
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010299 if (ctxt->input == NULL)
10300 return(0);
10301
Owen Taylor3473f882001-02-23 17:55:21 +000010302#ifdef DEBUG_PUSH
10303 switch (ctxt->instate) {
10304 case XML_PARSER_EOF:
10305 xmlGenericError(xmlGenericErrorContext,
10306 "PP: try EOF\n"); break;
10307 case XML_PARSER_START:
10308 xmlGenericError(xmlGenericErrorContext,
10309 "PP: try START\n"); break;
10310 case XML_PARSER_MISC:
10311 xmlGenericError(xmlGenericErrorContext,
10312 "PP: try MISC\n");break;
10313 case XML_PARSER_COMMENT:
10314 xmlGenericError(xmlGenericErrorContext,
10315 "PP: try COMMENT\n");break;
10316 case XML_PARSER_PROLOG:
10317 xmlGenericError(xmlGenericErrorContext,
10318 "PP: try PROLOG\n");break;
10319 case XML_PARSER_START_TAG:
10320 xmlGenericError(xmlGenericErrorContext,
10321 "PP: try START_TAG\n");break;
10322 case XML_PARSER_CONTENT:
10323 xmlGenericError(xmlGenericErrorContext,
10324 "PP: try CONTENT\n");break;
10325 case XML_PARSER_CDATA_SECTION:
10326 xmlGenericError(xmlGenericErrorContext,
10327 "PP: try CDATA_SECTION\n");break;
10328 case XML_PARSER_END_TAG:
10329 xmlGenericError(xmlGenericErrorContext,
10330 "PP: try END_TAG\n");break;
10331 case XML_PARSER_ENTITY_DECL:
10332 xmlGenericError(xmlGenericErrorContext,
10333 "PP: try ENTITY_DECL\n");break;
10334 case XML_PARSER_ENTITY_VALUE:
10335 xmlGenericError(xmlGenericErrorContext,
10336 "PP: try ENTITY_VALUE\n");break;
10337 case XML_PARSER_ATTRIBUTE_VALUE:
10338 xmlGenericError(xmlGenericErrorContext,
10339 "PP: try ATTRIBUTE_VALUE\n");break;
10340 case XML_PARSER_DTD:
10341 xmlGenericError(xmlGenericErrorContext,
10342 "PP: try DTD\n");break;
10343 case XML_PARSER_EPILOG:
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: try EPILOG\n");break;
10346 case XML_PARSER_PI:
10347 xmlGenericError(xmlGenericErrorContext,
10348 "PP: try PI\n");break;
10349 case XML_PARSER_IGNORE:
10350 xmlGenericError(xmlGenericErrorContext,
10351 "PP: try IGNORE\n");break;
10352 }
10353#endif
10354
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010355 if ((ctxt->input != NULL) &&
10356 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010357 xmlSHRINK(ctxt);
10358 ctxt->checkIndex = 0;
10359 }
10360 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010361
Daniel Veillarda880b122003-04-21 21:36:41 +000010362 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010363 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010364 return(0);
10365
10366
Owen Taylor3473f882001-02-23 17:55:21 +000010367 /*
10368 * Pop-up of finished entities.
10369 */
10370 while ((RAW == 0) && (ctxt->inputNr > 1))
10371 xmlPopInput(ctxt);
10372
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010373 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010374 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010375 avail = ctxt->input->length -
10376 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010377 else {
10378 /*
10379 * If we are operating on converted input, try to flush
10380 * remainng chars to avoid them stalling in the non-converted
10381 * buffer.
10382 */
10383 if ((ctxt->input->buf->raw != NULL) &&
10384 (ctxt->input->buf->raw->use > 0)) {
10385 int base = ctxt->input->base -
10386 ctxt->input->buf->buffer->content;
10387 int current = ctxt->input->cur - ctxt->input->base;
10388
10389 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10390 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10391 ctxt->input->cur = ctxt->input->base + current;
10392 ctxt->input->end =
10393 &ctxt->input->buf->buffer->content[
10394 ctxt->input->buf->buffer->use];
10395 }
10396 avail = ctxt->input->buf->buffer->use -
10397 (ctxt->input->cur - ctxt->input->base);
10398 }
Owen Taylor3473f882001-02-23 17:55:21 +000010399 if (avail < 1)
10400 goto done;
10401 switch (ctxt->instate) {
10402 case XML_PARSER_EOF:
10403 /*
10404 * Document parsing is done !
10405 */
10406 goto done;
10407 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010408 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10409 xmlChar start[4];
10410 xmlCharEncoding enc;
10411
10412 /*
10413 * Very first chars read from the document flow.
10414 */
10415 if (avail < 4)
10416 goto done;
10417
10418 /*
10419 * Get the 4 first bytes and decode the charset
10420 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010421 * plug some encoding conversion routines,
10422 * else xmlSwitchEncoding will set to (default)
10423 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010424 */
10425 start[0] = RAW;
10426 start[1] = NXT(1);
10427 start[2] = NXT(2);
10428 start[3] = NXT(3);
10429 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010430 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010431 break;
10432 }
Owen Taylor3473f882001-02-23 17:55:21 +000010433
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010434 if (avail < 2)
10435 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010436 cur = ctxt->input->cur[0];
10437 next = ctxt->input->cur[1];
10438 if (cur == 0) {
10439 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10440 ctxt->sax->setDocumentLocator(ctxt->userData,
10441 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010442 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010443 ctxt->instate = XML_PARSER_EOF;
10444#ifdef DEBUG_PUSH
10445 xmlGenericError(xmlGenericErrorContext,
10446 "PP: entering EOF\n");
10447#endif
10448 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10449 ctxt->sax->endDocument(ctxt->userData);
10450 goto done;
10451 }
10452 if ((cur == '<') && (next == '?')) {
10453 /* PI or XML decl */
10454 if (avail < 5) return(ret);
10455 if ((!terminate) &&
10456 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10457 return(ret);
10458 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10459 ctxt->sax->setDocumentLocator(ctxt->userData,
10460 &xmlDefaultSAXLocator);
10461 if ((ctxt->input->cur[2] == 'x') &&
10462 (ctxt->input->cur[3] == 'm') &&
10463 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010464 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010465 ret += 5;
10466#ifdef DEBUG_PUSH
10467 xmlGenericError(xmlGenericErrorContext,
10468 "PP: Parsing XML Decl\n");
10469#endif
10470 xmlParseXMLDecl(ctxt);
10471 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10472 /*
10473 * The XML REC instructs us to stop parsing right
10474 * here
10475 */
10476 ctxt->instate = XML_PARSER_EOF;
10477 return(0);
10478 }
10479 ctxt->standalone = ctxt->input->standalone;
10480 if ((ctxt->encoding == NULL) &&
10481 (ctxt->input->encoding != NULL))
10482 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10483 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10484 (!ctxt->disableSAX))
10485 ctxt->sax->startDocument(ctxt->userData);
10486 ctxt->instate = XML_PARSER_MISC;
10487#ifdef DEBUG_PUSH
10488 xmlGenericError(xmlGenericErrorContext,
10489 "PP: entering MISC\n");
10490#endif
10491 } else {
10492 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10493 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10494 (!ctxt->disableSAX))
10495 ctxt->sax->startDocument(ctxt->userData);
10496 ctxt->instate = XML_PARSER_MISC;
10497#ifdef DEBUG_PUSH
10498 xmlGenericError(xmlGenericErrorContext,
10499 "PP: entering MISC\n");
10500#endif
10501 }
10502 } else {
10503 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10504 ctxt->sax->setDocumentLocator(ctxt->userData,
10505 &xmlDefaultSAXLocator);
10506 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010507 if (ctxt->version == NULL) {
10508 xmlErrMemory(ctxt, NULL);
10509 break;
10510 }
Owen Taylor3473f882001-02-23 17:55:21 +000010511 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10512 (!ctxt->disableSAX))
10513 ctxt->sax->startDocument(ctxt->userData);
10514 ctxt->instate = XML_PARSER_MISC;
10515#ifdef DEBUG_PUSH
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: entering MISC\n");
10518#endif
10519 }
10520 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010521 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010522 const xmlChar *name;
10523 const xmlChar *prefix;
10524 const xmlChar *URI;
10525 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010526
10527 if ((avail < 2) && (ctxt->inputNr == 1))
10528 goto done;
10529 cur = ctxt->input->cur[0];
10530 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010531 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010532 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010533 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10534 ctxt->sax->endDocument(ctxt->userData);
10535 goto done;
10536 }
10537 if (!terminate) {
10538 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010539 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010540 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010541 goto done;
10542 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10543 goto done;
10544 }
10545 }
10546 if (ctxt->spaceNr == 0)
10547 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010548 else if (*ctxt->space == -2)
10549 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010550 else
10551 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010552#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010553 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010554#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010555 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010556#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010557 else
10558 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010559#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010560 if (name == NULL) {
10561 spacePop(ctxt);
10562 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010563 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10564 ctxt->sax->endDocument(ctxt->userData);
10565 goto done;
10566 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010567#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010568 /*
10569 * [ VC: Root Element Type ]
10570 * The Name in the document type declaration must match
10571 * the element type of the root element.
10572 */
10573 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10574 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10575 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010576#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010577
10578 /*
10579 * Check for an Empty Element.
10580 */
10581 if ((RAW == '/') && (NXT(1) == '>')) {
10582 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010583
10584 if (ctxt->sax2) {
10585 if ((ctxt->sax != NULL) &&
10586 (ctxt->sax->endElementNs != NULL) &&
10587 (!ctxt->disableSAX))
10588 ctxt->sax->endElementNs(ctxt->userData, name,
10589 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010590 if (ctxt->nsNr - nsNr > 0)
10591 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010592#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010593 } else {
10594 if ((ctxt->sax != NULL) &&
10595 (ctxt->sax->endElement != NULL) &&
10596 (!ctxt->disableSAX))
10597 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010598#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010599 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010600 spacePop(ctxt);
10601 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010602 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010603 } else {
10604 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010605 }
10606 break;
10607 }
10608 if (RAW == '>') {
10609 NEXT;
10610 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010611 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010612 "Couldn't find end of Start Tag %s\n",
10613 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010615 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010616 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010617 if (ctxt->sax2)
10618 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010619#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010620 else
10621 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010622#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010623
Daniel Veillarda880b122003-04-21 21:36:41 +000010624 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010625 break;
10626 }
10627 case XML_PARSER_CONTENT: {
10628 const xmlChar *test;
10629 unsigned int cons;
10630 if ((avail < 2) && (ctxt->inputNr == 1))
10631 goto done;
10632 cur = ctxt->input->cur[0];
10633 next = ctxt->input->cur[1];
10634
10635 test = CUR_PTR;
10636 cons = ctxt->input->consumed;
10637 if ((cur == '<') && (next == '/')) {
10638 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010639 break;
10640 } else if ((cur == '<') && (next == '?')) {
10641 if ((!terminate) &&
10642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10643 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010644 xmlParsePI(ctxt);
10645 } else if ((cur == '<') && (next != '!')) {
10646 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010647 break;
10648 } else if ((cur == '<') && (next == '!') &&
10649 (ctxt->input->cur[2] == '-') &&
10650 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010651 int term;
10652
10653 if (avail < 4)
10654 goto done;
10655 ctxt->input->cur += 4;
10656 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10657 ctxt->input->cur -= 4;
10658 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010659 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010660 xmlParseComment(ctxt);
10661 ctxt->instate = XML_PARSER_CONTENT;
10662 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10663 (ctxt->input->cur[2] == '[') &&
10664 (ctxt->input->cur[3] == 'C') &&
10665 (ctxt->input->cur[4] == 'D') &&
10666 (ctxt->input->cur[5] == 'A') &&
10667 (ctxt->input->cur[6] == 'T') &&
10668 (ctxt->input->cur[7] == 'A') &&
10669 (ctxt->input->cur[8] == '[')) {
10670 SKIP(9);
10671 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010672 break;
10673 } else if ((cur == '<') && (next == '!') &&
10674 (avail < 9)) {
10675 goto done;
10676 } else if (cur == '&') {
10677 if ((!terminate) &&
10678 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10679 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010680 xmlParseReference(ctxt);
10681 } else {
10682 /* TODO Avoid the extra copy, handle directly !!! */
10683 /*
10684 * Goal of the following test is:
10685 * - minimize calls to the SAX 'character' callback
10686 * when they are mergeable
10687 * - handle an problem for isBlank when we only parse
10688 * a sequence of blank chars and the next one is
10689 * not available to check against '<' presence.
10690 * - tries to homogenize the differences in SAX
10691 * callbacks between the push and pull versions
10692 * of the parser.
10693 */
10694 if ((ctxt->inputNr == 1) &&
10695 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10696 if (!terminate) {
10697 if (ctxt->progressive) {
10698 if ((lastlt == NULL) ||
10699 (ctxt->input->cur > lastlt))
10700 goto done;
10701 } else if (xmlParseLookupSequence(ctxt,
10702 '<', 0, 0) < 0) {
10703 goto done;
10704 }
10705 }
10706 }
10707 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010708 xmlParseCharData(ctxt, 0);
10709 }
10710 /*
10711 * Pop-up of finished entities.
10712 */
10713 while ((RAW == 0) && (ctxt->inputNr > 1))
10714 xmlPopInput(ctxt);
10715 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010716 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10717 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010718 ctxt->instate = XML_PARSER_EOF;
10719 break;
10720 }
10721 break;
10722 }
10723 case XML_PARSER_END_TAG:
10724 if (avail < 2)
10725 goto done;
10726 if (!terminate) {
10727 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010728 /* > can be found unescaped in attribute values */
10729 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010730 goto done;
10731 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10732 goto done;
10733 }
10734 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010735 if (ctxt->sax2) {
10736 xmlParseEndTag2(ctxt,
10737 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10738 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010739 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010740 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010741 }
10742#ifdef LIBXML_SAX1_ENABLED
10743 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010744 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010745#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010746 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010747 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010748 } else {
10749 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010750 }
10751 break;
10752 case XML_PARSER_CDATA_SECTION: {
10753 /*
10754 * The Push mode need to have the SAX callback for
10755 * cdataBlock merge back contiguous callbacks.
10756 */
10757 int base;
10758
10759 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10760 if (base < 0) {
10761 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010762 int tmp;
10763
10764 tmp = xmlCheckCdataPush(ctxt->input->cur,
10765 XML_PARSER_BIG_BUFFER_SIZE);
10766 if (tmp < 0) {
10767 tmp = -tmp;
10768 ctxt->input->cur += tmp;
10769 goto encoding_error;
10770 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010771 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10772 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010773 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010774 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010775 else if (ctxt->sax->characters != NULL)
10776 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010777 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010778 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010779 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010780 ctxt->checkIndex = 0;
10781 }
10782 goto done;
10783 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010784 int tmp;
10785
10786 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10787 if ((tmp < 0) || (tmp != base)) {
10788 tmp = -tmp;
10789 ctxt->input->cur += tmp;
10790 goto encoding_error;
10791 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010792 if ((ctxt->sax != NULL) && (base == 0) &&
10793 (ctxt->sax->cdataBlock != NULL) &&
10794 (!ctxt->disableSAX)) {
10795 /*
10796 * Special case to provide identical behaviour
10797 * between pull and push parsers on enpty CDATA
10798 * sections
10799 */
10800 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10801 (!strncmp((const char *)&ctxt->input->cur[-9],
10802 "<![CDATA[", 9)))
10803 ctxt->sax->cdataBlock(ctxt->userData,
10804 BAD_CAST "", 0);
10805 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010806 (!ctxt->disableSAX)) {
10807 if (ctxt->sax->cdataBlock != NULL)
10808 ctxt->sax->cdataBlock(ctxt->userData,
10809 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010810 else if (ctxt->sax->characters != NULL)
10811 ctxt->sax->characters(ctxt->userData,
10812 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010814 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010815 ctxt->checkIndex = 0;
10816 ctxt->instate = XML_PARSER_CONTENT;
10817#ifdef DEBUG_PUSH
10818 xmlGenericError(xmlGenericErrorContext,
10819 "PP: entering CONTENT\n");
10820#endif
10821 }
10822 break;
10823 }
Owen Taylor3473f882001-02-23 17:55:21 +000010824 case XML_PARSER_MISC:
10825 SKIP_BLANKS;
10826 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010827 avail = ctxt->input->length -
10828 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010829 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010830 avail = ctxt->input->buf->buffer->use -
10831 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010832 if (avail < 2)
10833 goto done;
10834 cur = ctxt->input->cur[0];
10835 next = ctxt->input->cur[1];
10836 if ((cur == '<') && (next == '?')) {
10837 if ((!terminate) &&
10838 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10839 goto done;
10840#ifdef DEBUG_PUSH
10841 xmlGenericError(xmlGenericErrorContext,
10842 "PP: Parsing PI\n");
10843#endif
10844 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010845 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010846 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010847 (ctxt->input->cur[2] == '-') &&
10848 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010849 if ((!terminate) &&
10850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10851 goto done;
10852#ifdef DEBUG_PUSH
10853 xmlGenericError(xmlGenericErrorContext,
10854 "PP: Parsing Comment\n");
10855#endif
10856 xmlParseComment(ctxt);
10857 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010858 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010859 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010860 (ctxt->input->cur[2] == 'D') &&
10861 (ctxt->input->cur[3] == 'O') &&
10862 (ctxt->input->cur[4] == 'C') &&
10863 (ctxt->input->cur[5] == 'T') &&
10864 (ctxt->input->cur[6] == 'Y') &&
10865 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010866 (ctxt->input->cur[8] == 'E')) {
10867 if ((!terminate) &&
10868 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10869 goto done;
10870#ifdef DEBUG_PUSH
10871 xmlGenericError(xmlGenericErrorContext,
10872 "PP: Parsing internal subset\n");
10873#endif
10874 ctxt->inSubset = 1;
10875 xmlParseDocTypeDecl(ctxt);
10876 if (RAW == '[') {
10877 ctxt->instate = XML_PARSER_DTD;
10878#ifdef DEBUG_PUSH
10879 xmlGenericError(xmlGenericErrorContext,
10880 "PP: entering DTD\n");
10881#endif
10882 } else {
10883 /*
10884 * Create and update the external subset.
10885 */
10886 ctxt->inSubset = 2;
10887 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10888 (ctxt->sax->externalSubset != NULL))
10889 ctxt->sax->externalSubset(ctxt->userData,
10890 ctxt->intSubName, ctxt->extSubSystem,
10891 ctxt->extSubURI);
10892 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010893 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010894 ctxt->instate = XML_PARSER_PROLOG;
10895#ifdef DEBUG_PUSH
10896 xmlGenericError(xmlGenericErrorContext,
10897 "PP: entering PROLOG\n");
10898#endif
10899 }
10900 } else if ((cur == '<') && (next == '!') &&
10901 (avail < 9)) {
10902 goto done;
10903 } else {
10904 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010905 ctxt->progressive = 1;
10906 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010907#ifdef DEBUG_PUSH
10908 xmlGenericError(xmlGenericErrorContext,
10909 "PP: entering START_TAG\n");
10910#endif
10911 }
10912 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010913 case XML_PARSER_PROLOG:
10914 SKIP_BLANKS;
10915 if (ctxt->input->buf == NULL)
10916 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10917 else
10918 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10919 if (avail < 2)
10920 goto done;
10921 cur = ctxt->input->cur[0];
10922 next = ctxt->input->cur[1];
10923 if ((cur == '<') && (next == '?')) {
10924 if ((!terminate) &&
10925 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10926 goto done;
10927#ifdef DEBUG_PUSH
10928 xmlGenericError(xmlGenericErrorContext,
10929 "PP: Parsing PI\n");
10930#endif
10931 xmlParsePI(ctxt);
10932 } else if ((cur == '<') && (next == '!') &&
10933 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10934 if ((!terminate) &&
10935 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10936 goto done;
10937#ifdef DEBUG_PUSH
10938 xmlGenericError(xmlGenericErrorContext,
10939 "PP: Parsing Comment\n");
10940#endif
10941 xmlParseComment(ctxt);
10942 ctxt->instate = XML_PARSER_PROLOG;
10943 } else if ((cur == '<') && (next == '!') &&
10944 (avail < 4)) {
10945 goto done;
10946 } else {
10947 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010948 if (ctxt->progressive == 0)
10949 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010950 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010951#ifdef DEBUG_PUSH
10952 xmlGenericError(xmlGenericErrorContext,
10953 "PP: entering START_TAG\n");
10954#endif
10955 }
10956 break;
10957 case XML_PARSER_EPILOG:
10958 SKIP_BLANKS;
10959 if (ctxt->input->buf == NULL)
10960 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10961 else
10962 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10963 if (avail < 2)
10964 goto done;
10965 cur = ctxt->input->cur[0];
10966 next = ctxt->input->cur[1];
10967 if ((cur == '<') && (next == '?')) {
10968 if ((!terminate) &&
10969 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10970 goto done;
10971#ifdef DEBUG_PUSH
10972 xmlGenericError(xmlGenericErrorContext,
10973 "PP: Parsing PI\n");
10974#endif
10975 xmlParsePI(ctxt);
10976 ctxt->instate = XML_PARSER_EPILOG;
10977 } else if ((cur == '<') && (next == '!') &&
10978 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10979 if ((!terminate) &&
10980 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10981 goto done;
10982#ifdef DEBUG_PUSH
10983 xmlGenericError(xmlGenericErrorContext,
10984 "PP: Parsing Comment\n");
10985#endif
10986 xmlParseComment(ctxt);
10987 ctxt->instate = XML_PARSER_EPILOG;
10988 } else if ((cur == '<') && (next == '!') &&
10989 (avail < 4)) {
10990 goto done;
10991 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010992 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010993 ctxt->instate = XML_PARSER_EOF;
10994#ifdef DEBUG_PUSH
10995 xmlGenericError(xmlGenericErrorContext,
10996 "PP: entering EOF\n");
10997#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010998 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010999 ctxt->sax->endDocument(ctxt->userData);
11000 goto done;
11001 }
11002 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011003 case XML_PARSER_DTD: {
11004 /*
11005 * Sorry but progressive parsing of the internal subset
11006 * is not expected to be supported. We first check that
11007 * the full content of the internal subset is available and
11008 * the parsing is launched only at that point.
11009 * Internal subset ends up with "']' S? '>'" in an unescaped
11010 * section and not in a ']]>' sequence which are conditional
11011 * sections (whoever argued to keep that crap in XML deserve
11012 * a place in hell !).
11013 */
11014 int base, i;
11015 xmlChar *buf;
11016 xmlChar quote = 0;
11017
11018 base = ctxt->input->cur - ctxt->input->base;
11019 if (base < 0) return(0);
11020 if (ctxt->checkIndex > base)
11021 base = ctxt->checkIndex;
11022 buf = ctxt->input->buf->buffer->content;
11023 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11024 base++) {
11025 if (quote != 0) {
11026 if (buf[base] == quote)
11027 quote = 0;
11028 continue;
11029 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011030 if ((quote == 0) && (buf[base] == '<')) {
11031 int found = 0;
11032 /* special handling of comments */
11033 if (((unsigned int) base + 4 <
11034 ctxt->input->buf->buffer->use) &&
11035 (buf[base + 1] == '!') &&
11036 (buf[base + 2] == '-') &&
11037 (buf[base + 3] == '-')) {
11038 for (;(unsigned int) base + 3 <
11039 ctxt->input->buf->buffer->use; base++) {
11040 if ((buf[base] == '-') &&
11041 (buf[base + 1] == '-') &&
11042 (buf[base + 2] == '>')) {
11043 found = 1;
11044 base += 2;
11045 break;
11046 }
11047 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011048 if (!found) {
11049#if 0
11050 fprintf(stderr, "unfinished comment\n");
11051#endif
11052 break; /* for */
11053 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011054 continue;
11055 }
11056 }
Owen Taylor3473f882001-02-23 17:55:21 +000011057 if (buf[base] == '"') {
11058 quote = '"';
11059 continue;
11060 }
11061 if (buf[base] == '\'') {
11062 quote = '\'';
11063 continue;
11064 }
11065 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011066#if 0
11067 fprintf(stderr, "%c%c%c%c: ", buf[base],
11068 buf[base + 1], buf[base + 2], buf[base + 3]);
11069#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011070 if ((unsigned int) base +1 >=
11071 ctxt->input->buf->buffer->use)
11072 break;
11073 if (buf[base + 1] == ']') {
11074 /* conditional crap, skip both ']' ! */
11075 base++;
11076 continue;
11077 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011078 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011079 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11080 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011081 if (buf[base + i] == '>') {
11082#if 0
11083 fprintf(stderr, "found\n");
11084#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011085 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011086 }
11087 if (!IS_BLANK_CH(buf[base + i])) {
11088#if 0
11089 fprintf(stderr, "not found\n");
11090#endif
11091 goto not_end_of_int_subset;
11092 }
Owen Taylor3473f882001-02-23 17:55:21 +000011093 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011094#if 0
11095 fprintf(stderr, "end of stream\n");
11096#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011097 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011098
Owen Taylor3473f882001-02-23 17:55:21 +000011099 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011100not_end_of_int_subset:
11101 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011102 }
11103 /*
11104 * We didn't found the end of the Internal subset
11105 */
Owen Taylor3473f882001-02-23 17:55:21 +000011106#ifdef DEBUG_PUSH
11107 if (next == 0)
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: lookup of int subset end filed\n");
11110#endif
11111 goto done;
11112
11113found_end_int_subset:
11114 xmlParseInternalSubset(ctxt);
11115 ctxt->inSubset = 2;
11116 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11117 (ctxt->sax->externalSubset != NULL))
11118 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11119 ctxt->extSubSystem, ctxt->extSubURI);
11120 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011121 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011122 ctxt->instate = XML_PARSER_PROLOG;
11123 ctxt->checkIndex = 0;
11124#ifdef DEBUG_PUSH
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: entering PROLOG\n");
11127#endif
11128 break;
11129 }
11130 case XML_PARSER_COMMENT:
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: internal error, state == COMMENT\n");
11133 ctxt->instate = XML_PARSER_CONTENT;
11134#ifdef DEBUG_PUSH
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: entering CONTENT\n");
11137#endif
11138 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011139 case XML_PARSER_IGNORE:
11140 xmlGenericError(xmlGenericErrorContext,
11141 "PP: internal error, state == IGNORE");
11142 ctxt->instate = XML_PARSER_DTD;
11143#ifdef DEBUG_PUSH
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: entering DTD\n");
11146#endif
11147 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011148 case XML_PARSER_PI:
11149 xmlGenericError(xmlGenericErrorContext,
11150 "PP: internal error, state == PI\n");
11151 ctxt->instate = XML_PARSER_CONTENT;
11152#ifdef DEBUG_PUSH
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: entering CONTENT\n");
11155#endif
11156 break;
11157 case XML_PARSER_ENTITY_DECL:
11158 xmlGenericError(xmlGenericErrorContext,
11159 "PP: internal error, state == ENTITY_DECL\n");
11160 ctxt->instate = XML_PARSER_DTD;
11161#ifdef DEBUG_PUSH
11162 xmlGenericError(xmlGenericErrorContext,
11163 "PP: entering DTD\n");
11164#endif
11165 break;
11166 case XML_PARSER_ENTITY_VALUE:
11167 xmlGenericError(xmlGenericErrorContext,
11168 "PP: internal error, state == ENTITY_VALUE\n");
11169 ctxt->instate = XML_PARSER_CONTENT;
11170#ifdef DEBUG_PUSH
11171 xmlGenericError(xmlGenericErrorContext,
11172 "PP: entering DTD\n");
11173#endif
11174 break;
11175 case XML_PARSER_ATTRIBUTE_VALUE:
11176 xmlGenericError(xmlGenericErrorContext,
11177 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11178 ctxt->instate = XML_PARSER_START_TAG;
11179#ifdef DEBUG_PUSH
11180 xmlGenericError(xmlGenericErrorContext,
11181 "PP: entering START_TAG\n");
11182#endif
11183 break;
11184 case XML_PARSER_SYSTEM_LITERAL:
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: internal error, state == SYSTEM_LITERAL\n");
11187 ctxt->instate = XML_PARSER_START_TAG;
11188#ifdef DEBUG_PUSH
11189 xmlGenericError(xmlGenericErrorContext,
11190 "PP: entering START_TAG\n");
11191#endif
11192 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011193 case XML_PARSER_PUBLIC_LITERAL:
11194 xmlGenericError(xmlGenericErrorContext,
11195 "PP: internal error, state == PUBLIC_LITERAL\n");
11196 ctxt->instate = XML_PARSER_START_TAG;
11197#ifdef DEBUG_PUSH
11198 xmlGenericError(xmlGenericErrorContext,
11199 "PP: entering START_TAG\n");
11200#endif
11201 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011202 }
11203 }
11204done:
11205#ifdef DEBUG_PUSH
11206 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11207#endif
11208 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011209encoding_error:
11210 {
11211 char buffer[150];
11212
11213 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11214 ctxt->input->cur[0], ctxt->input->cur[1],
11215 ctxt->input->cur[2], ctxt->input->cur[3]);
11216 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11217 "Input is not proper UTF-8, indicate encoding !\n%s",
11218 BAD_CAST buffer, NULL);
11219 }
11220 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011221}
11222
11223/**
Owen Taylor3473f882001-02-23 17:55:21 +000011224 * xmlParseChunk:
11225 * @ctxt: an XML parser context
11226 * @chunk: an char array
11227 * @size: the size in byte of the chunk
11228 * @terminate: last chunk indicator
11229 *
11230 * Parse a Chunk of memory
11231 *
11232 * Returns zero if no error, the xmlParserErrors otherwise.
11233 */
11234int
11235xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11236 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011237 int end_in_lf = 0;
11238
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011239 if (ctxt == NULL)
11240 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011241 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011242 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011243 if (ctxt->instate == XML_PARSER_START)
11244 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011245 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11246 (chunk[size - 1] == '\r')) {
11247 end_in_lf = 1;
11248 size--;
11249 }
Owen Taylor3473f882001-02-23 17:55:21 +000011250 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11251 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11252 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11253 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011254 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011255
William M. Bracka3215c72004-07-31 16:24:01 +000011256 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11257 if (res < 0) {
11258 ctxt->errNo = XML_PARSER_EOF;
11259 ctxt->disableSAX = 1;
11260 return (XML_PARSER_EOF);
11261 }
Owen Taylor3473f882001-02-23 17:55:21 +000011262 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11263 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011264 ctxt->input->end =
11265 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011266#ifdef DEBUG_PUSH
11267 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11268#endif
11269
Owen Taylor3473f882001-02-23 17:55:21 +000011270 } else if (ctxt->instate != XML_PARSER_EOF) {
11271 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11272 xmlParserInputBufferPtr in = ctxt->input->buf;
11273 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11274 (in->raw != NULL)) {
11275 int nbchars;
11276
11277 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11278 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011279 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011280 xmlGenericError(xmlGenericErrorContext,
11281 "xmlParseChunk: encoder error\n");
11282 return(XML_ERR_INVALID_ENCODING);
11283 }
11284 }
11285 }
11286 }
11287 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011288 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11289 (ctxt->input->buf != NULL)) {
11290 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11291 }
Daniel Veillard14412512005-01-21 23:53:26 +000011292 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011293 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011294 if (terminate) {
11295 /*
11296 * Check for termination
11297 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011298 int avail = 0;
11299
11300 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011301 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011302 avail = ctxt->input->length -
11303 (ctxt->input->cur - ctxt->input->base);
11304 else
11305 avail = ctxt->input->buf->buffer->use -
11306 (ctxt->input->cur - ctxt->input->base);
11307 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011308
Owen Taylor3473f882001-02-23 17:55:21 +000011309 if ((ctxt->instate != XML_PARSER_EOF) &&
11310 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011311 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011312 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011313 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011314 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011315 }
Owen Taylor3473f882001-02-23 17:55:21 +000011316 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011317 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011318 ctxt->sax->endDocument(ctxt->userData);
11319 }
11320 ctxt->instate = XML_PARSER_EOF;
11321 }
11322 return((xmlParserErrors) ctxt->errNo);
11323}
11324
11325/************************************************************************
11326 * *
11327 * I/O front end functions to the parser *
11328 * *
11329 ************************************************************************/
11330
11331/**
Owen Taylor3473f882001-02-23 17:55:21 +000011332 * xmlCreatePushParserCtxt:
11333 * @sax: a SAX handler
11334 * @user_data: The user data returned on SAX callbacks
11335 * @chunk: a pointer to an array of chars
11336 * @size: number of chars in the array
11337 * @filename: an optional file name or URI
11338 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011339 * Create a parser context for using the XML parser in push mode.
11340 * If @buffer and @size are non-NULL, the data is used to detect
11341 * the encoding. The remaining characters will be parsed so they
11342 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011343 * To allow content encoding detection, @size should be >= 4
11344 * The value of @filename is used for fetching external entities
11345 * and error/warning reports.
11346 *
11347 * Returns the new parser context or NULL
11348 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011349
Owen Taylor3473f882001-02-23 17:55:21 +000011350xmlParserCtxtPtr
11351xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11352 const char *chunk, int size, const char *filename) {
11353 xmlParserCtxtPtr ctxt;
11354 xmlParserInputPtr inputStream;
11355 xmlParserInputBufferPtr buf;
11356 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11357
11358 /*
11359 * plug some encoding conversion routines
11360 */
11361 if ((chunk != NULL) && (size >= 4))
11362 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11363
11364 buf = xmlAllocParserInputBuffer(enc);
11365 if (buf == NULL) return(NULL);
11366
11367 ctxt = xmlNewParserCtxt();
11368 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011369 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011370 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011371 return(NULL);
11372 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011373 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011374 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11375 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011376 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011377 xmlFreeParserInputBuffer(buf);
11378 xmlFreeParserCtxt(ctxt);
11379 return(NULL);
11380 }
Owen Taylor3473f882001-02-23 17:55:21 +000011381 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011382#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011383 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011384#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011385 xmlFree(ctxt->sax);
11386 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11387 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011388 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011389 xmlFreeParserInputBuffer(buf);
11390 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011391 return(NULL);
11392 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011393 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11394 if (sax->initialized == XML_SAX2_MAGIC)
11395 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11396 else
11397 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011398 if (user_data != NULL)
11399 ctxt->userData = user_data;
11400 }
11401 if (filename == NULL) {
11402 ctxt->directory = NULL;
11403 } else {
11404 ctxt->directory = xmlParserGetDirectory(filename);
11405 }
11406
11407 inputStream = xmlNewInputStream(ctxt);
11408 if (inputStream == NULL) {
11409 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011410 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011411 return(NULL);
11412 }
11413
11414 if (filename == NULL)
11415 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011416 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011417 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011418 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011419 if (inputStream->filename == NULL) {
11420 xmlFreeParserCtxt(ctxt);
11421 xmlFreeParserInputBuffer(buf);
11422 return(NULL);
11423 }
11424 }
Owen Taylor3473f882001-02-23 17:55:21 +000011425 inputStream->buf = buf;
11426 inputStream->base = inputStream->buf->buffer->content;
11427 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011428 inputStream->end =
11429 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011430
11431 inputPush(ctxt, inputStream);
11432
William M. Brack3a1cd212005-02-11 14:35:54 +000011433 /*
11434 * If the caller didn't provide an initial 'chunk' for determining
11435 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11436 * that it can be automatically determined later
11437 */
11438 if ((size == 0) || (chunk == NULL)) {
11439 ctxt->charset = XML_CHAR_ENCODING_NONE;
11440 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011441 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11442 int cur = ctxt->input->cur - ctxt->input->base;
11443
Owen Taylor3473f882001-02-23 17:55:21 +000011444 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011445
11446 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11447 ctxt->input->cur = ctxt->input->base + cur;
11448 ctxt->input->end =
11449 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011450#ifdef DEBUG_PUSH
11451 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11452#endif
11453 }
11454
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011455 if (enc != XML_CHAR_ENCODING_NONE) {
11456 xmlSwitchEncoding(ctxt, enc);
11457 }
11458
Owen Taylor3473f882001-02-23 17:55:21 +000011459 return(ctxt);
11460}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011461#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011462
11463/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011464 * xmlStopParser:
11465 * @ctxt: an XML parser context
11466 *
11467 * Blocks further parser processing
11468 */
11469void
11470xmlStopParser(xmlParserCtxtPtr ctxt) {
11471 if (ctxt == NULL)
11472 return;
11473 ctxt->instate = XML_PARSER_EOF;
11474 ctxt->disableSAX = 1;
11475 if (ctxt->input != NULL) {
11476 ctxt->input->cur = BAD_CAST"";
11477 ctxt->input->base = ctxt->input->cur;
11478 }
11479}
11480
11481/**
Owen Taylor3473f882001-02-23 17:55:21 +000011482 * xmlCreateIOParserCtxt:
11483 * @sax: a SAX handler
11484 * @user_data: The user data returned on SAX callbacks
11485 * @ioread: an I/O read function
11486 * @ioclose: an I/O close function
11487 * @ioctx: an I/O handler
11488 * @enc: the charset encoding if known
11489 *
11490 * Create a parser context for using the XML parser with an existing
11491 * I/O stream
11492 *
11493 * Returns the new parser context or NULL
11494 */
11495xmlParserCtxtPtr
11496xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11497 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11498 void *ioctx, xmlCharEncoding enc) {
11499 xmlParserCtxtPtr ctxt;
11500 xmlParserInputPtr inputStream;
11501 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011502
11503 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011504
11505 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11506 if (buf == NULL) return(NULL);
11507
11508 ctxt = xmlNewParserCtxt();
11509 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011510 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011511 return(NULL);
11512 }
11513 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011514#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011515 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011516#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011517 xmlFree(ctxt->sax);
11518 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11519 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011520 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011521 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011522 return(NULL);
11523 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011524 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11525 if (sax->initialized == XML_SAX2_MAGIC)
11526 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11527 else
11528 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011529 if (user_data != NULL)
11530 ctxt->userData = user_data;
11531 }
11532
11533 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11534 if (inputStream == NULL) {
11535 xmlFreeParserCtxt(ctxt);
11536 return(NULL);
11537 }
11538 inputPush(ctxt, inputStream);
11539
11540 return(ctxt);
11541}
11542
Daniel Veillard4432df22003-09-28 18:58:27 +000011543#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011544/************************************************************************
11545 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011546 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011547 * *
11548 ************************************************************************/
11549
11550/**
11551 * xmlIOParseDTD:
11552 * @sax: the SAX handler block or NULL
11553 * @input: an Input Buffer
11554 * @enc: the charset encoding if known
11555 *
11556 * Load and parse a DTD
11557 *
11558 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011559 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011560 */
11561
11562xmlDtdPtr
11563xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11564 xmlCharEncoding enc) {
11565 xmlDtdPtr ret = NULL;
11566 xmlParserCtxtPtr ctxt;
11567 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011568 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011569
11570 if (input == NULL)
11571 return(NULL);
11572
11573 ctxt = xmlNewParserCtxt();
11574 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011575 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011576 return(NULL);
11577 }
11578
11579 /*
11580 * Set-up the SAX context
11581 */
11582 if (sax != NULL) {
11583 if (ctxt->sax != NULL)
11584 xmlFree(ctxt->sax);
11585 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011586 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011587 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011588 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011589
11590 /*
11591 * generate a parser input from the I/O handler
11592 */
11593
Daniel Veillard43caefb2003-12-07 19:32:22 +000011594 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011595 if (pinput == NULL) {
11596 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011597 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011598 xmlFreeParserCtxt(ctxt);
11599 return(NULL);
11600 }
11601
11602 /*
11603 * plug some encoding conversion routines here.
11604 */
11605 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011606 if (enc != XML_CHAR_ENCODING_NONE) {
11607 xmlSwitchEncoding(ctxt, enc);
11608 }
Owen Taylor3473f882001-02-23 17:55:21 +000011609
11610 pinput->filename = NULL;
11611 pinput->line = 1;
11612 pinput->col = 1;
11613 pinput->base = ctxt->input->cur;
11614 pinput->cur = ctxt->input->cur;
11615 pinput->free = NULL;
11616
11617 /*
11618 * let's parse that entity knowing it's an external subset.
11619 */
11620 ctxt->inSubset = 2;
11621 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011622 if (ctxt->myDoc == NULL) {
11623 xmlErrMemory(ctxt, "New Doc failed");
11624 return(NULL);
11625 }
11626 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011627 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11628 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011629
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011630 if ((enc == XML_CHAR_ENCODING_NONE) &&
11631 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011632 /*
11633 * Get the 4 first bytes and decode the charset
11634 * if enc != XML_CHAR_ENCODING_NONE
11635 * plug some encoding conversion routines.
11636 */
11637 start[0] = RAW;
11638 start[1] = NXT(1);
11639 start[2] = NXT(2);
11640 start[3] = NXT(3);
11641 enc = xmlDetectCharEncoding(start, 4);
11642 if (enc != XML_CHAR_ENCODING_NONE) {
11643 xmlSwitchEncoding(ctxt, enc);
11644 }
11645 }
11646
Owen Taylor3473f882001-02-23 17:55:21 +000011647 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11648
11649 if (ctxt->myDoc != NULL) {
11650 if (ctxt->wellFormed) {
11651 ret = ctxt->myDoc->extSubset;
11652 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011653 if (ret != NULL) {
11654 xmlNodePtr tmp;
11655
11656 ret->doc = NULL;
11657 tmp = ret->children;
11658 while (tmp != NULL) {
11659 tmp->doc = NULL;
11660 tmp = tmp->next;
11661 }
11662 }
Owen Taylor3473f882001-02-23 17:55:21 +000011663 } else {
11664 ret = NULL;
11665 }
11666 xmlFreeDoc(ctxt->myDoc);
11667 ctxt->myDoc = NULL;
11668 }
11669 if (sax != NULL) ctxt->sax = NULL;
11670 xmlFreeParserCtxt(ctxt);
11671
11672 return(ret);
11673}
11674
11675/**
11676 * xmlSAXParseDTD:
11677 * @sax: the SAX handler block
11678 * @ExternalID: a NAME* containing the External ID of the DTD
11679 * @SystemID: a NAME* containing the URL to the DTD
11680 *
11681 * Load and parse an external subset.
11682 *
11683 * Returns the resulting xmlDtdPtr or NULL in case of error.
11684 */
11685
11686xmlDtdPtr
11687xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11688 const xmlChar *SystemID) {
11689 xmlDtdPtr ret = NULL;
11690 xmlParserCtxtPtr ctxt;
11691 xmlParserInputPtr input = NULL;
11692 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011693 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011694
11695 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11696
11697 ctxt = xmlNewParserCtxt();
11698 if (ctxt == NULL) {
11699 return(NULL);
11700 }
11701
11702 /*
11703 * Set-up the SAX context
11704 */
11705 if (sax != NULL) {
11706 if (ctxt->sax != NULL)
11707 xmlFree(ctxt->sax);
11708 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011709 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011710 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011711
11712 /*
11713 * Canonicalise the system ID
11714 */
11715 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011716 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011717 xmlFreeParserCtxt(ctxt);
11718 return(NULL);
11719 }
Owen Taylor3473f882001-02-23 17:55:21 +000011720
11721 /*
11722 * Ask the Entity resolver to load the damn thing
11723 */
11724
11725 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011726 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11727 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011728 if (input == NULL) {
11729 if (sax != NULL) ctxt->sax = NULL;
11730 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011731 if (systemIdCanonic != NULL)
11732 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011733 return(NULL);
11734 }
11735
11736 /*
11737 * plug some encoding conversion routines here.
11738 */
11739 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011740 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11741 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11742 xmlSwitchEncoding(ctxt, enc);
11743 }
Owen Taylor3473f882001-02-23 17:55:21 +000011744
11745 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011746 input->filename = (char *) systemIdCanonic;
11747 else
11748 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011749 input->line = 1;
11750 input->col = 1;
11751 input->base = ctxt->input->cur;
11752 input->cur = ctxt->input->cur;
11753 input->free = NULL;
11754
11755 /*
11756 * let's parse that entity knowing it's an external subset.
11757 */
11758 ctxt->inSubset = 2;
11759 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011760 if (ctxt->myDoc == NULL) {
11761 xmlErrMemory(ctxt, "New Doc failed");
11762 if (sax != NULL) ctxt->sax = NULL;
11763 xmlFreeParserCtxt(ctxt);
11764 return(NULL);
11765 }
11766 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11768 ExternalID, SystemID);
11769 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11770
11771 if (ctxt->myDoc != NULL) {
11772 if (ctxt->wellFormed) {
11773 ret = ctxt->myDoc->extSubset;
11774 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011775 if (ret != NULL) {
11776 xmlNodePtr tmp;
11777
11778 ret->doc = NULL;
11779 tmp = ret->children;
11780 while (tmp != NULL) {
11781 tmp->doc = NULL;
11782 tmp = tmp->next;
11783 }
11784 }
Owen Taylor3473f882001-02-23 17:55:21 +000011785 } else {
11786 ret = NULL;
11787 }
11788 xmlFreeDoc(ctxt->myDoc);
11789 ctxt->myDoc = NULL;
11790 }
11791 if (sax != NULL) ctxt->sax = NULL;
11792 xmlFreeParserCtxt(ctxt);
11793
11794 return(ret);
11795}
11796
Daniel Veillard4432df22003-09-28 18:58:27 +000011797
Owen Taylor3473f882001-02-23 17:55:21 +000011798/**
11799 * xmlParseDTD:
11800 * @ExternalID: a NAME* containing the External ID of the DTD
11801 * @SystemID: a NAME* containing the URL to the DTD
11802 *
11803 * Load and parse an external subset.
11804 *
11805 * Returns the resulting xmlDtdPtr or NULL in case of error.
11806 */
11807
11808xmlDtdPtr
11809xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11810 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11811}
Daniel Veillard4432df22003-09-28 18:58:27 +000011812#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011813
11814/************************************************************************
11815 * *
11816 * Front ends when parsing an Entity *
11817 * *
11818 ************************************************************************/
11819
11820/**
Owen Taylor3473f882001-02-23 17:55:21 +000011821 * xmlParseCtxtExternalEntity:
11822 * @ctx: the existing parsing context
11823 * @URL: the URL for the entity to load
11824 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011825 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011826 *
11827 * Parse an external general entity within an existing parsing context
11828 * An external general parsed entity is well-formed if it matches the
11829 * production labeled extParsedEnt.
11830 *
11831 * [78] extParsedEnt ::= TextDecl? content
11832 *
11833 * Returns 0 if the entity is well formed, -1 in case of args problem and
11834 * the parser error code otherwise
11835 */
11836
11837int
11838xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011839 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011840 xmlParserCtxtPtr ctxt;
11841 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011842 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011843 xmlSAXHandlerPtr oldsax = NULL;
11844 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011845 xmlChar start[4];
11846 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011847 xmlParserInputPtr inputStream;
11848 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011849
Daniel Veillardce682bc2004-11-05 17:22:25 +000011850 if (ctx == NULL) return(-1);
11851
Daniel Veillard8915c152008-08-26 13:05:34 +000011852 if (((ctx->depth > 20) || (ctx->nbentities >= 100000)) &&
11853 ((ctx->options & XML_PARSE_HUGE) == 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +000011854 return(XML_ERR_ENTITY_LOOP);
11855 }
11856
Daniel Veillardcda96922001-08-21 10:56:31 +000011857 if (lst != NULL)
11858 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011859 if ((URL == NULL) && (ID == NULL))
11860 return(-1);
11861 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11862 return(-1);
11863
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011864 ctxt = xmlNewParserCtxt();
11865 if (ctxt == NULL) {
11866 return(-1);
11867 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011868
Owen Taylor3473f882001-02-23 17:55:21 +000011869 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011870 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011871
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011872 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11873 if (inputStream == NULL) {
11874 xmlFreeParserCtxt(ctxt);
11875 return(-1);
11876 }
11877
11878 inputPush(ctxt, inputStream);
11879
11880 if ((ctxt->directory == NULL) && (directory == NULL))
11881 directory = xmlParserGetDirectory((char *)URL);
11882 if ((ctxt->directory == NULL) && (directory != NULL))
11883 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011884
Owen Taylor3473f882001-02-23 17:55:21 +000011885 oldsax = ctxt->sax;
11886 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011887 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011888 newDoc = xmlNewDoc(BAD_CAST "1.0");
11889 if (newDoc == NULL) {
11890 xmlFreeParserCtxt(ctxt);
11891 return(-1);
11892 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000011893 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011894 if (ctx->myDoc->dict) {
11895 newDoc->dict = ctx->myDoc->dict;
11896 xmlDictReference(newDoc->dict);
11897 }
Owen Taylor3473f882001-02-23 17:55:21 +000011898 if (ctx->myDoc != NULL) {
11899 newDoc->intSubset = ctx->myDoc->intSubset;
11900 newDoc->extSubset = ctx->myDoc->extSubset;
11901 }
11902 if (ctx->myDoc->URL != NULL) {
11903 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11904 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011905 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11906 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011907 ctxt->sax = oldsax;
11908 xmlFreeParserCtxt(ctxt);
11909 newDoc->intSubset = NULL;
11910 newDoc->extSubset = NULL;
11911 xmlFreeDoc(newDoc);
11912 return(-1);
11913 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011914 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011915 nodePush(ctxt, newDoc->children);
11916 if (ctx->myDoc == NULL) {
11917 ctxt->myDoc = newDoc;
11918 } else {
11919 ctxt->myDoc = ctx->myDoc;
11920 newDoc->children->doc = ctx->myDoc;
11921 }
11922
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011923 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000011924 * Get the 4 first bytes and decode the charset
11925 * if enc != XML_CHAR_ENCODING_NONE
11926 * plug some encoding conversion routines.
11927 */
11928 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011929 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11930 start[0] = RAW;
11931 start[1] = NXT(1);
11932 start[2] = NXT(2);
11933 start[3] = NXT(3);
11934 enc = xmlDetectCharEncoding(start, 4);
11935 if (enc != XML_CHAR_ENCODING_NONE) {
11936 xmlSwitchEncoding(ctxt, enc);
11937 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011938 }
11939
Owen Taylor3473f882001-02-23 17:55:21 +000011940 /*
11941 * Parse a possible text declaration first
11942 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011943 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011944 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011945 /*
11946 * An XML-1.0 document can't reference an entity not XML-1.0
11947 */
11948 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
11949 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11950 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11951 "Version mismatch between document and entity\n");
11952 }
Owen Taylor3473f882001-02-23 17:55:21 +000011953 }
11954
11955 /*
11956 * Doing validity checking on chunk doesn't make sense
11957 */
11958 ctxt->instate = XML_PARSER_CONTENT;
11959 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011960 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011961 ctxt->loadsubset = ctx->loadsubset;
11962 ctxt->depth = ctx->depth + 1;
11963 ctxt->replaceEntities = ctx->replaceEntities;
11964 if (ctxt->validate) {
11965 ctxt->vctxt.error = ctx->vctxt.error;
11966 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011967 } else {
11968 ctxt->vctxt.error = NULL;
11969 ctxt->vctxt.warning = NULL;
11970 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011971 ctxt->vctxt.nodeTab = NULL;
11972 ctxt->vctxt.nodeNr = 0;
11973 ctxt->vctxt.nodeMax = 0;
11974 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011975 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11976 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011977 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11978 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11979 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011980 ctxt->dictNames = ctx->dictNames;
11981 ctxt->attsDefault = ctx->attsDefault;
11982 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011983 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011984
11985 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011986
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011987 ctx->validate = ctxt->validate;
11988 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011989 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011990 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011991 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011992 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011993 }
11994 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011995 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011996 }
11997
11998 if (!ctxt->wellFormed) {
11999 if (ctxt->errNo == 0)
12000 ret = 1;
12001 else
12002 ret = ctxt->errNo;
12003 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012004 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012005 xmlNodePtr cur;
12006
12007 /*
12008 * Return the newly created nodeset after unlinking it from
12009 * they pseudo parent.
12010 */
12011 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012012 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012013 while (cur != NULL) {
12014 cur->parent = NULL;
12015 cur = cur->next;
12016 }
12017 newDoc->children->children = NULL;
12018 }
12019 ret = 0;
12020 }
12021 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012022 ctxt->dict = NULL;
12023 ctxt->attsDefault = NULL;
12024 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012025 xmlFreeParserCtxt(ctxt);
12026 newDoc->intSubset = NULL;
12027 newDoc->extSubset = NULL;
12028 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012029
Owen Taylor3473f882001-02-23 17:55:21 +000012030 return(ret);
12031}
12032
12033/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012034 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012035 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012036 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012037 * @sax: the SAX handler bloc (possibly NULL)
12038 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12039 * @depth: Used for loop detection, use 0
12040 * @URL: the URL for the entity to load
12041 * @ID: the System ID for the entity to load
12042 * @list: the return value for the set of parsed nodes
12043 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012044 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012045 *
12046 * Returns 0 if the entity is well formed, -1 in case of args problem and
12047 * the parser error code otherwise
12048 */
12049
Daniel Veillard7d515752003-09-26 19:12:37 +000012050static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012051xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12052 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012053 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012054 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012055 xmlParserCtxtPtr ctxt;
12056 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012057 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012058 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012059 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012060 xmlChar start[4];
12061 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012062
Daniel Veillard8915c152008-08-26 13:05:34 +000012063 if (((depth > 20) ||
12064 ((oldctxt != NULL) && (oldctxt->nbentities >= 100000))) &&
12065 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012066 return(XML_ERR_ENTITY_LOOP);
12067 }
12068
Owen Taylor3473f882001-02-23 17:55:21 +000012069 if (list != NULL)
12070 *list = NULL;
12071 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012072 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012073 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012074 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012075
12076
12077 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012078 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012079 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012080 if (oldctxt != NULL) {
12081 ctxt->_private = oldctxt->_private;
12082 ctxt->loadsubset = oldctxt->loadsubset;
12083 ctxt->validate = oldctxt->validate;
12084 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012085 ctxt->record_info = oldctxt->record_info;
12086 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12087 ctxt->node_seq.length = oldctxt->node_seq.length;
12088 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012089 } else {
12090 /*
12091 * Doing validity checking on chunk without context
12092 * doesn't make sense
12093 */
12094 ctxt->_private = NULL;
12095 ctxt->validate = 0;
12096 ctxt->external = 2;
12097 ctxt->loadsubset = 0;
12098 }
Owen Taylor3473f882001-02-23 17:55:21 +000012099 if (sax != NULL) {
12100 oldsax = ctxt->sax;
12101 ctxt->sax = sax;
12102 if (user_data != NULL)
12103 ctxt->userData = user_data;
12104 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012105 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 newDoc = xmlNewDoc(BAD_CAST "1.0");
12107 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012108 ctxt->node_seq.maximum = 0;
12109 ctxt->node_seq.length = 0;
12110 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012111 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012112 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012113 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012114 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012115 newDoc->intSubset = doc->intSubset;
12116 newDoc->extSubset = doc->extSubset;
12117 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012118 xmlDictReference(newDoc->dict);
12119
Owen Taylor3473f882001-02-23 17:55:21 +000012120 if (doc->URL != NULL) {
12121 newDoc->URL = xmlStrdup(doc->URL);
12122 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012123 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12124 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012125 if (sax != NULL)
12126 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012127 ctxt->node_seq.maximum = 0;
12128 ctxt->node_seq.length = 0;
12129 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012130 xmlFreeParserCtxt(ctxt);
12131 newDoc->intSubset = NULL;
12132 newDoc->extSubset = NULL;
12133 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012134 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012135 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012136 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012137 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012138 ctxt->myDoc = doc;
12139 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012140
Daniel Veillard87a764e2001-06-20 17:41:10 +000012141 /*
12142 * Get the 4 first bytes and decode the charset
12143 * if enc != XML_CHAR_ENCODING_NONE
12144 * plug some encoding conversion routines.
12145 */
12146 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012147 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12148 start[0] = RAW;
12149 start[1] = NXT(1);
12150 start[2] = NXT(2);
12151 start[3] = NXT(3);
12152 enc = xmlDetectCharEncoding(start, 4);
12153 if (enc != XML_CHAR_ENCODING_NONE) {
12154 xmlSwitchEncoding(ctxt, enc);
12155 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012156 }
12157
Owen Taylor3473f882001-02-23 17:55:21 +000012158 /*
12159 * Parse a possible text declaration first
12160 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012161 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012162 xmlParseTextDecl(ctxt);
12163 }
12164
Owen Taylor3473f882001-02-23 17:55:21 +000012165 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012166 ctxt->depth = depth;
12167
12168 xmlParseContent(ctxt);
12169
Daniel Veillard561b7f82002-03-20 21:55:57 +000012170 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012171 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012172 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012173 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012174 }
12175 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012176 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012177 }
12178
12179 if (!ctxt->wellFormed) {
12180 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012181 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012182 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012183 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012184 } else {
12185 if (list != NULL) {
12186 xmlNodePtr cur;
12187
12188 /*
12189 * Return the newly created nodeset after unlinking it from
12190 * they pseudo parent.
12191 */
12192 cur = newDoc->children->children;
12193 *list = cur;
12194 while (cur != NULL) {
12195 cur->parent = NULL;
12196 cur = cur->next;
12197 }
12198 newDoc->children->children = NULL;
12199 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012200 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012201 }
12202 if (sax != NULL)
12203 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012204 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12205 oldctxt->node_seq.length = ctxt->node_seq.length;
12206 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012207 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012208 ctxt->node_seq.maximum = 0;
12209 ctxt->node_seq.length = 0;
12210 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012211 xmlFreeParserCtxt(ctxt);
12212 newDoc->intSubset = NULL;
12213 newDoc->extSubset = NULL;
12214 xmlFreeDoc(newDoc);
12215
12216 return(ret);
12217}
12218
Daniel Veillard81273902003-09-30 00:43:48 +000012219#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012220/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012221 * xmlParseExternalEntity:
12222 * @doc: the document the chunk pertains to
12223 * @sax: the SAX handler bloc (possibly NULL)
12224 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12225 * @depth: Used for loop detection, use 0
12226 * @URL: the URL for the entity to load
12227 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012228 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012229 *
12230 * Parse an external general entity
12231 * An external general parsed entity is well-formed if it matches the
12232 * production labeled extParsedEnt.
12233 *
12234 * [78] extParsedEnt ::= TextDecl? content
12235 *
12236 * Returns 0 if the entity is well formed, -1 in case of args problem and
12237 * the parser error code otherwise
12238 */
12239
12240int
12241xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012242 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012243 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012244 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012245}
12246
12247/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012248 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012249 * @doc: the document the chunk pertains to
12250 * @sax: the SAX handler bloc (possibly NULL)
12251 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12252 * @depth: Used for loop detection, use 0
12253 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012254 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012255 *
12256 * Parse a well-balanced chunk of an XML document
12257 * called by the parser
12258 * The allowed sequence for the Well Balanced Chunk is the one defined by
12259 * the content production in the XML grammar:
12260 *
12261 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12262 *
12263 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12264 * the parser error code otherwise
12265 */
12266
12267int
12268xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012269 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012270 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12271 depth, string, lst, 0 );
12272}
Daniel Veillard81273902003-09-30 00:43:48 +000012273#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012274
12275/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012276 * xmlParseBalancedChunkMemoryInternal:
12277 * @oldctxt: the existing parsing context
12278 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12279 * @user_data: the user data field for the parser context
12280 * @lst: the return value for the set of parsed nodes
12281 *
12282 *
12283 * Parse a well-balanced chunk of an XML document
12284 * called by the parser
12285 * The allowed sequence for the Well Balanced Chunk is the one defined by
12286 * the content production in the XML grammar:
12287 *
12288 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12289 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012290 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12291 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000012292 *
12293 * In case recover is set to 1, the nodelist will not be empty even if
12294 * the parsed chunk is not well balanced.
12295 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012296static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012297xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12298 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12299 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012300 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012301 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012302 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012303 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012304 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012305 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012306 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012307
Daniel Veillard8915c152008-08-26 13:05:34 +000012308 if (((oldctxt->depth > 20) || (oldctxt->nbentities >= 100000)) &&
12309 ((oldctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012310 return(XML_ERR_ENTITY_LOOP);
12311 }
12312
12313
12314 if (lst != NULL)
12315 *lst = NULL;
12316 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012317 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012318
12319 size = xmlStrlen(string);
12320
12321 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012322 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012323 if (user_data != NULL)
12324 ctxt->userData = user_data;
12325 else
12326 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012327 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12328 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012329 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12330 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12331 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012332
12333 oldsax = ctxt->sax;
12334 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012335 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012336 ctxt->replaceEntities = oldctxt->replaceEntities;
12337 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012338
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012339 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012340 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012341 newDoc = xmlNewDoc(BAD_CAST "1.0");
12342 if (newDoc == NULL) {
12343 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012344 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012345 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012346 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012347 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012348 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012349 newDoc->dict = ctxt->dict;
12350 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012351 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012352 } else {
12353 ctxt->myDoc = oldctxt->myDoc;
12354 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012355 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012356 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012357 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12358 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012359 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012360 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012361 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012362 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012363 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012364 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012365 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012366 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012367 ctxt->myDoc->children = NULL;
12368 ctxt->myDoc->last = NULL;
12369 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012370 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012371 ctxt->instate = XML_PARSER_CONTENT;
12372 ctxt->depth = oldctxt->depth + 1;
12373
Daniel Veillard328f48c2002-11-15 15:24:34 +000012374 ctxt->validate = 0;
12375 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012376 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12377 /*
12378 * ID/IDREF registration will be done in xmlValidateElement below
12379 */
12380 ctxt->loadsubset |= XML_SKIP_IDS;
12381 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012382 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012383 ctxt->attsDefault = oldctxt->attsDefault;
12384 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012385
Daniel Veillard68e9e742002-11-16 15:35:11 +000012386 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012387 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012388 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012389 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012390 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012391 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012392 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012393 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012394 }
12395
12396 if (!ctxt->wellFormed) {
12397 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012398 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012399 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012400 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012401 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012402 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012403 }
12404
William M. Brack7b9154b2003-09-27 19:23:50 +000012405 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012406 xmlNodePtr cur;
12407
12408 /*
12409 * Return the newly created nodeset after unlinking it from
12410 * they pseudo parent.
12411 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012412 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012413 *lst = cur;
12414 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012415#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012416 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12417 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12418 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012419 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12420 oldctxt->myDoc, cur);
12421 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012422#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012423 cur->parent = NULL;
12424 cur = cur->next;
12425 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012426 ctxt->myDoc->children->children = NULL;
12427 }
12428 if (ctxt->myDoc != NULL) {
12429 xmlFreeNode(ctxt->myDoc->children);
12430 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012431 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012432 }
12433
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012434 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012435 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012436 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012437 ctxt->attsDefault = NULL;
12438 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012439 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012440 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012441 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012442 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012443
12444 return(ret);
12445}
12446
Daniel Veillard29b17482004-08-16 00:39:03 +000012447/**
12448 * xmlParseInNodeContext:
12449 * @node: the context node
12450 * @data: the input string
12451 * @datalen: the input string length in bytes
12452 * @options: a combination of xmlParserOption
12453 * @lst: the return value for the set of parsed nodes
12454 *
12455 * Parse a well-balanced chunk of an XML document
12456 * within the context (DTD, namespaces, etc ...) of the given node.
12457 *
12458 * The allowed sequence for the data is a Well Balanced Chunk defined by
12459 * the content production in the XML grammar:
12460 *
12461 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12462 *
12463 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12464 * error code otherwise
12465 */
12466xmlParserErrors
12467xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12468 int options, xmlNodePtr *lst) {
12469#ifdef SAX2
12470 xmlParserCtxtPtr ctxt;
12471 xmlDocPtr doc = NULL;
12472 xmlNodePtr fake, cur;
12473 int nsnr = 0;
12474
12475 xmlParserErrors ret = XML_ERR_OK;
12476
12477 /*
12478 * check all input parameters, grab the document
12479 */
12480 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12481 return(XML_ERR_INTERNAL_ERROR);
12482 switch (node->type) {
12483 case XML_ELEMENT_NODE:
12484 case XML_ATTRIBUTE_NODE:
12485 case XML_TEXT_NODE:
12486 case XML_CDATA_SECTION_NODE:
12487 case XML_ENTITY_REF_NODE:
12488 case XML_PI_NODE:
12489 case XML_COMMENT_NODE:
12490 case XML_DOCUMENT_NODE:
12491 case XML_HTML_DOCUMENT_NODE:
12492 break;
12493 default:
12494 return(XML_ERR_INTERNAL_ERROR);
12495
12496 }
12497 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12498 (node->type != XML_DOCUMENT_NODE) &&
12499 (node->type != XML_HTML_DOCUMENT_NODE))
12500 node = node->parent;
12501 if (node == NULL)
12502 return(XML_ERR_INTERNAL_ERROR);
12503 if (node->type == XML_ELEMENT_NODE)
12504 doc = node->doc;
12505 else
12506 doc = (xmlDocPtr) node;
12507 if (doc == NULL)
12508 return(XML_ERR_INTERNAL_ERROR);
12509
12510 /*
12511 * allocate a context and set-up everything not related to the
12512 * node position in the tree
12513 */
12514 if (doc->type == XML_DOCUMENT_NODE)
12515 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12516#ifdef LIBXML_HTML_ENABLED
12517 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12518 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12519#endif
12520 else
12521 return(XML_ERR_INTERNAL_ERROR);
12522
12523 if (ctxt == NULL)
12524 return(XML_ERR_NO_MEMORY);
12525 fake = xmlNewComment(NULL);
12526 if (fake == NULL) {
12527 xmlFreeParserCtxt(ctxt);
12528 return(XML_ERR_NO_MEMORY);
12529 }
12530 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012531
12532 /*
12533 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12534 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12535 * we must wait until the last moment to free the original one.
12536 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012537 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012538 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012539 xmlDictFree(ctxt->dict);
12540 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012541 } else
12542 options |= XML_PARSE_NODICT;
12543
Daniel Veillard37334572008-07-31 08:20:02 +000012544 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012545 xmlDetectSAX2(ctxt);
12546 ctxt->myDoc = doc;
12547
12548 if (node->type == XML_ELEMENT_NODE) {
12549 nodePush(ctxt, node);
12550 /*
12551 * initialize the SAX2 namespaces stack
12552 */
12553 cur = node;
12554 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12555 xmlNsPtr ns = cur->nsDef;
12556 const xmlChar *iprefix, *ihref;
12557
12558 while (ns != NULL) {
12559 if (ctxt->dict) {
12560 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12561 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12562 } else {
12563 iprefix = ns->prefix;
12564 ihref = ns->href;
12565 }
12566
12567 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12568 nsPush(ctxt, iprefix, ihref);
12569 nsnr++;
12570 }
12571 ns = ns->next;
12572 }
12573 cur = cur->parent;
12574 }
12575 ctxt->instate = XML_PARSER_CONTENT;
12576 }
12577
12578 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12579 /*
12580 * ID/IDREF registration will be done in xmlValidateElement below
12581 */
12582 ctxt->loadsubset |= XML_SKIP_IDS;
12583 }
12584
Daniel Veillard499cc922006-01-18 17:22:35 +000012585#ifdef LIBXML_HTML_ENABLED
12586 if (doc->type == XML_HTML_DOCUMENT_NODE)
12587 __htmlParseContent(ctxt);
12588 else
12589#endif
12590 xmlParseContent(ctxt);
12591
Daniel Veillard29b17482004-08-16 00:39:03 +000012592 nsPop(ctxt, nsnr);
12593 if ((RAW == '<') && (NXT(1) == '/')) {
12594 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12595 } else if (RAW != 0) {
12596 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12597 }
12598 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12599 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12600 ctxt->wellFormed = 0;
12601 }
12602
12603 if (!ctxt->wellFormed) {
12604 if (ctxt->errNo == 0)
12605 ret = XML_ERR_INTERNAL_ERROR;
12606 else
12607 ret = (xmlParserErrors)ctxt->errNo;
12608 } else {
12609 ret = XML_ERR_OK;
12610 }
12611
12612 /*
12613 * Return the newly created nodeset after unlinking it from
12614 * the pseudo sibling.
12615 */
12616
12617 cur = fake->next;
12618 fake->next = NULL;
12619 node->last = fake;
12620
12621 if (cur != NULL) {
12622 cur->prev = NULL;
12623 }
12624
12625 *lst = cur;
12626
12627 while (cur != NULL) {
12628 cur->parent = NULL;
12629 cur = cur->next;
12630 }
12631
12632 xmlUnlinkNode(fake);
12633 xmlFreeNode(fake);
12634
12635
12636 if (ret != XML_ERR_OK) {
12637 xmlFreeNodeList(*lst);
12638 *lst = NULL;
12639 }
William M. Brackc3f81342004-10-03 01:22:44 +000012640
William M. Brackb7b54de2004-10-06 16:38:01 +000012641 if (doc->dict != NULL)
12642 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012643 xmlFreeParserCtxt(ctxt);
12644
12645 return(ret);
12646#else /* !SAX2 */
12647 return(XML_ERR_INTERNAL_ERROR);
12648#endif
12649}
12650
Daniel Veillard81273902003-09-30 00:43:48 +000012651#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012652/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012653 * xmlParseBalancedChunkMemoryRecover:
12654 * @doc: the document the chunk pertains to
12655 * @sax: the SAX handler bloc (possibly NULL)
12656 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12657 * @depth: Used for loop detection, use 0
12658 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12659 * @lst: the return value for the set of parsed nodes
12660 * @recover: return nodes even if the data is broken (use 0)
12661 *
12662 *
12663 * Parse a well-balanced chunk of an XML document
12664 * called by the parser
12665 * The allowed sequence for the Well Balanced Chunk is the one defined by
12666 * the content production in the XML grammar:
12667 *
12668 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12669 *
12670 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12671 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012672 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012673 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012674 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12675 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012676 */
12677int
12678xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012679 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012680 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012681 xmlParserCtxtPtr ctxt;
12682 xmlDocPtr newDoc;
12683 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012684 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012685 int size;
12686 int ret = 0;
12687
Daniel Veillard8915c152008-08-26 13:05:34 +000012688 if (depth > 20) {
Owen Taylor3473f882001-02-23 17:55:21 +000012689 return(XML_ERR_ENTITY_LOOP);
12690 }
12691
12692
Daniel Veillardcda96922001-08-21 10:56:31 +000012693 if (lst != NULL)
12694 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012695 if (string == NULL)
12696 return(-1);
12697
12698 size = xmlStrlen(string);
12699
12700 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12701 if (ctxt == NULL) return(-1);
12702 ctxt->userData = ctxt;
12703 if (sax != NULL) {
12704 oldsax = ctxt->sax;
12705 ctxt->sax = sax;
12706 if (user_data != NULL)
12707 ctxt->userData = user_data;
12708 }
12709 newDoc = xmlNewDoc(BAD_CAST "1.0");
12710 if (newDoc == NULL) {
12711 xmlFreeParserCtxt(ctxt);
12712 return(-1);
12713 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012714 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012715 if ((doc != NULL) && (doc->dict != NULL)) {
12716 xmlDictFree(ctxt->dict);
12717 ctxt->dict = doc->dict;
12718 xmlDictReference(ctxt->dict);
12719 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12720 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12721 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12722 ctxt->dictNames = 1;
12723 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012724 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012725 }
Owen Taylor3473f882001-02-23 17:55:21 +000012726 if (doc != NULL) {
12727 newDoc->intSubset = doc->intSubset;
12728 newDoc->extSubset = doc->extSubset;
12729 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012730 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12731 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012732 if (sax != NULL)
12733 ctxt->sax = oldsax;
12734 xmlFreeParserCtxt(ctxt);
12735 newDoc->intSubset = NULL;
12736 newDoc->extSubset = NULL;
12737 xmlFreeDoc(newDoc);
12738 return(-1);
12739 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012740 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12741 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012742 if (doc == NULL) {
12743 ctxt->myDoc = newDoc;
12744 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012745 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012746 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012747 /* Ensure that doc has XML spec namespace */
12748 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12749 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012750 }
12751 ctxt->instate = XML_PARSER_CONTENT;
12752 ctxt->depth = depth;
12753
12754 /*
12755 * Doing validity checking on chunk doesn't make sense
12756 */
12757 ctxt->validate = 0;
12758 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012759 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012760
Daniel Veillardb39bc392002-10-26 19:29:51 +000012761 if ( doc != NULL ){
12762 content = doc->children;
12763 doc->children = NULL;
12764 xmlParseContent(ctxt);
12765 doc->children = content;
12766 }
12767 else {
12768 xmlParseContent(ctxt);
12769 }
Owen Taylor3473f882001-02-23 17:55:21 +000012770 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012771 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012772 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012773 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012774 }
12775 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012776 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012777 }
12778
12779 if (!ctxt->wellFormed) {
12780 if (ctxt->errNo == 0)
12781 ret = 1;
12782 else
12783 ret = ctxt->errNo;
12784 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012785 ret = 0;
12786 }
12787
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012788 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12789 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012790
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012791 /*
12792 * Return the newly created nodeset after unlinking it from
12793 * they pseudo parent.
12794 */
12795 cur = newDoc->children->children;
12796 *lst = cur;
12797 while (cur != NULL) {
12798 xmlSetTreeDoc(cur, doc);
12799 cur->parent = NULL;
12800 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012801 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012802 newDoc->children->children = NULL;
12803 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012804
Owen Taylor3473f882001-02-23 17:55:21 +000012805 if (sax != NULL)
12806 ctxt->sax = oldsax;
12807 xmlFreeParserCtxt(ctxt);
12808 newDoc->intSubset = NULL;
12809 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012810 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012811 xmlFreeDoc(newDoc);
12812
12813 return(ret);
12814}
12815
12816/**
12817 * xmlSAXParseEntity:
12818 * @sax: the SAX handler block
12819 * @filename: the filename
12820 *
12821 * parse an XML external entity out of context and build a tree.
12822 * It use the given SAX function block to handle the parsing callback.
12823 * If sax is NULL, fallback to the default DOM tree building routines.
12824 *
12825 * [78] extParsedEnt ::= TextDecl? content
12826 *
12827 * This correspond to a "Well Balanced" chunk
12828 *
12829 * Returns the resulting document tree
12830 */
12831
12832xmlDocPtr
12833xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12834 xmlDocPtr ret;
12835 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012836
12837 ctxt = xmlCreateFileParserCtxt(filename);
12838 if (ctxt == NULL) {
12839 return(NULL);
12840 }
12841 if (sax != NULL) {
12842 if (ctxt->sax != NULL)
12843 xmlFree(ctxt->sax);
12844 ctxt->sax = sax;
12845 ctxt->userData = NULL;
12846 }
12847
Owen Taylor3473f882001-02-23 17:55:21 +000012848 xmlParseExtParsedEnt(ctxt);
12849
12850 if (ctxt->wellFormed)
12851 ret = ctxt->myDoc;
12852 else {
12853 ret = NULL;
12854 xmlFreeDoc(ctxt->myDoc);
12855 ctxt->myDoc = NULL;
12856 }
12857 if (sax != NULL)
12858 ctxt->sax = NULL;
12859 xmlFreeParserCtxt(ctxt);
12860
12861 return(ret);
12862}
12863
12864/**
12865 * xmlParseEntity:
12866 * @filename: the filename
12867 *
12868 * parse an XML external entity out of context and build a tree.
12869 *
12870 * [78] extParsedEnt ::= TextDecl? content
12871 *
12872 * This correspond to a "Well Balanced" chunk
12873 *
12874 * Returns the resulting document tree
12875 */
12876
12877xmlDocPtr
12878xmlParseEntity(const char *filename) {
12879 return(xmlSAXParseEntity(NULL, filename));
12880}
Daniel Veillard81273902003-09-30 00:43:48 +000012881#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012882
12883/**
12884 * xmlCreateEntityParserCtxt:
12885 * @URL: the entity URL
12886 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012887 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012888 *
12889 * Create a parser context for an external entity
12890 * Automatic support for ZLIB/Compress compressed document is provided
12891 * by default if found at compile-time.
12892 *
12893 * Returns the new parser context or NULL
12894 */
12895xmlParserCtxtPtr
12896xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12897 const xmlChar *base) {
12898 xmlParserCtxtPtr ctxt;
12899 xmlParserInputPtr inputStream;
12900 char *directory = NULL;
12901 xmlChar *uri;
12902
12903 ctxt = xmlNewParserCtxt();
12904 if (ctxt == NULL) {
12905 return(NULL);
12906 }
12907
12908 uri = xmlBuildURI(URL, base);
12909
12910 if (uri == NULL) {
12911 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12912 if (inputStream == NULL) {
12913 xmlFreeParserCtxt(ctxt);
12914 return(NULL);
12915 }
12916
12917 inputPush(ctxt, inputStream);
12918
12919 if ((ctxt->directory == NULL) && (directory == NULL))
12920 directory = xmlParserGetDirectory((char *)URL);
12921 if ((ctxt->directory == NULL) && (directory != NULL))
12922 ctxt->directory = directory;
12923 } else {
12924 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12925 if (inputStream == NULL) {
12926 xmlFree(uri);
12927 xmlFreeParserCtxt(ctxt);
12928 return(NULL);
12929 }
12930
12931 inputPush(ctxt, inputStream);
12932
12933 if ((ctxt->directory == NULL) && (directory == NULL))
12934 directory = xmlParserGetDirectory((char *)uri);
12935 if ((ctxt->directory == NULL) && (directory != NULL))
12936 ctxt->directory = directory;
12937 xmlFree(uri);
12938 }
Owen Taylor3473f882001-02-23 17:55:21 +000012939 return(ctxt);
12940}
12941
12942/************************************************************************
12943 * *
12944 * Front ends when parsing from a file *
12945 * *
12946 ************************************************************************/
12947
12948/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012949 * xmlCreateURLParserCtxt:
12950 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012951 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012952 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012953 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012954 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012955 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012956 *
12957 * Returns the new parser context or NULL
12958 */
12959xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012960xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012961{
12962 xmlParserCtxtPtr ctxt;
12963 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012964 char *directory = NULL;
12965
Owen Taylor3473f882001-02-23 17:55:21 +000012966 ctxt = xmlNewParserCtxt();
12967 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012968 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012969 return(NULL);
12970 }
12971
Daniel Veillarddf292f72005-01-16 19:00:15 +000012972 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000012973 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000012974 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000012975
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012976 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012977 if (inputStream == NULL) {
12978 xmlFreeParserCtxt(ctxt);
12979 return(NULL);
12980 }
12981
Owen Taylor3473f882001-02-23 17:55:21 +000012982 inputPush(ctxt, inputStream);
12983 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012984 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012985 if ((ctxt->directory == NULL) && (directory != NULL))
12986 ctxt->directory = directory;
12987
12988 return(ctxt);
12989}
12990
Daniel Veillard61b93382003-11-03 14:28:31 +000012991/**
12992 * xmlCreateFileParserCtxt:
12993 * @filename: the filename
12994 *
12995 * Create a parser context for a file content.
12996 * Automatic support for ZLIB/Compress compressed document is provided
12997 * by default if found at compile-time.
12998 *
12999 * Returns the new parser context or NULL
13000 */
13001xmlParserCtxtPtr
13002xmlCreateFileParserCtxt(const char *filename)
13003{
13004 return(xmlCreateURLParserCtxt(filename, 0));
13005}
13006
Daniel Veillard81273902003-09-30 00:43:48 +000013007#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013008/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013009 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013010 * @sax: the SAX handler block
13011 * @filename: the filename
13012 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13013 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013014 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013015 *
13016 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13017 * compressed document is provided by default if found at compile-time.
13018 * It use the given SAX function block to handle the parsing callback.
13019 * If sax is NULL, fallback to the default DOM tree building routines.
13020 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013021 * User data (void *) is stored within the parser context in the
13022 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013023 *
Owen Taylor3473f882001-02-23 17:55:21 +000013024 * Returns the resulting document tree
13025 */
13026
13027xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013028xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13029 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlDocPtr ret;
13031 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013032
Daniel Veillard635ef722001-10-29 11:48:19 +000013033 xmlInitParser();
13034
Owen Taylor3473f882001-02-23 17:55:21 +000013035 ctxt = xmlCreateFileParserCtxt(filename);
13036 if (ctxt == NULL) {
13037 return(NULL);
13038 }
13039 if (sax != NULL) {
13040 if (ctxt->sax != NULL)
13041 xmlFree(ctxt->sax);
13042 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013043 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013044 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013045 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013046 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013047 }
Owen Taylor3473f882001-02-23 17:55:21 +000013048
Daniel Veillard37d2d162008-03-14 10:54:00 +000013049 if (ctxt->directory == NULL)
13050 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013051
Daniel Veillarddad3f682002-11-17 16:47:27 +000013052 ctxt->recovery = recovery;
13053
Owen Taylor3473f882001-02-23 17:55:21 +000013054 xmlParseDocument(ctxt);
13055
William M. Brackc07329e2003-09-08 01:57:30 +000013056 if ((ctxt->wellFormed) || recovery) {
13057 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013058 if (ret != NULL) {
13059 if (ctxt->input->buf->compressed > 0)
13060 ret->compression = 9;
13061 else
13062 ret->compression = ctxt->input->buf->compressed;
13063 }
William M. Brackc07329e2003-09-08 01:57:30 +000013064 }
Owen Taylor3473f882001-02-23 17:55:21 +000013065 else {
13066 ret = NULL;
13067 xmlFreeDoc(ctxt->myDoc);
13068 ctxt->myDoc = NULL;
13069 }
13070 if (sax != NULL)
13071 ctxt->sax = NULL;
13072 xmlFreeParserCtxt(ctxt);
13073
13074 return(ret);
13075}
13076
13077/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013078 * xmlSAXParseFile:
13079 * @sax: the SAX handler block
13080 * @filename: the filename
13081 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13082 * documents
13083 *
13084 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13085 * compressed document is provided by default if found at compile-time.
13086 * It use the given SAX function block to handle the parsing callback.
13087 * If sax is NULL, fallback to the default DOM tree building routines.
13088 *
13089 * Returns the resulting document tree
13090 */
13091
13092xmlDocPtr
13093xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13094 int recovery) {
13095 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13096}
13097
13098/**
Owen Taylor3473f882001-02-23 17:55:21 +000013099 * xmlRecoverDoc:
13100 * @cur: a pointer to an array of xmlChar
13101 *
13102 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013103 * In the case the document is not Well Formed, a attempt to build a
13104 * tree is tried anyway
13105 *
13106 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013107 */
13108
13109xmlDocPtr
13110xmlRecoverDoc(xmlChar *cur) {
13111 return(xmlSAXParseDoc(NULL, cur, 1));
13112}
13113
13114/**
13115 * xmlParseFile:
13116 * @filename: the filename
13117 *
13118 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13119 * compressed document is provided by default if found at compile-time.
13120 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013121 * Returns the resulting document tree if the file was wellformed,
13122 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013123 */
13124
13125xmlDocPtr
13126xmlParseFile(const char *filename) {
13127 return(xmlSAXParseFile(NULL, filename, 0));
13128}
13129
13130/**
13131 * xmlRecoverFile:
13132 * @filename: the filename
13133 *
13134 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13135 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013136 * In the case the document is not Well Formed, it attempts to build
13137 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013138 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013139 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013140 */
13141
13142xmlDocPtr
13143xmlRecoverFile(const char *filename) {
13144 return(xmlSAXParseFile(NULL, filename, 1));
13145}
13146
13147
13148/**
13149 * xmlSetupParserForBuffer:
13150 * @ctxt: an XML parser context
13151 * @buffer: a xmlChar * buffer
13152 * @filename: a file name
13153 *
13154 * Setup the parser context to parse a new buffer; Clears any prior
13155 * contents from the parser context. The buffer parameter must not be
13156 * NULL, but the filename parameter can be
13157 */
13158void
13159xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13160 const char* filename)
13161{
13162 xmlParserInputPtr input;
13163
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013164 if ((ctxt == NULL) || (buffer == NULL))
13165 return;
13166
Owen Taylor3473f882001-02-23 17:55:21 +000013167 input = xmlNewInputStream(ctxt);
13168 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013169 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013170 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013171 return;
13172 }
13173
13174 xmlClearParserCtxt(ctxt);
13175 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013176 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013177 input->base = buffer;
13178 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013179 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013180 inputPush(ctxt, input);
13181}
13182
13183/**
13184 * xmlSAXUserParseFile:
13185 * @sax: a SAX handler
13186 * @user_data: The user data returned on SAX callbacks
13187 * @filename: a file name
13188 *
13189 * parse an XML file and call the given SAX handler routines.
13190 * Automatic support for ZLIB/Compress compressed document is provided
13191 *
13192 * Returns 0 in case of success or a error number otherwise
13193 */
13194int
13195xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13196 const char *filename) {
13197 int ret = 0;
13198 xmlParserCtxtPtr ctxt;
13199
13200 ctxt = xmlCreateFileParserCtxt(filename);
13201 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013202 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013203 xmlFree(ctxt->sax);
13204 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013205 xmlDetectSAX2(ctxt);
13206
Owen Taylor3473f882001-02-23 17:55:21 +000013207 if (user_data != NULL)
13208 ctxt->userData = user_data;
13209
13210 xmlParseDocument(ctxt);
13211
13212 if (ctxt->wellFormed)
13213 ret = 0;
13214 else {
13215 if (ctxt->errNo != 0)
13216 ret = ctxt->errNo;
13217 else
13218 ret = -1;
13219 }
13220 if (sax != NULL)
13221 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013222 if (ctxt->myDoc != NULL) {
13223 xmlFreeDoc(ctxt->myDoc);
13224 ctxt->myDoc = NULL;
13225 }
Owen Taylor3473f882001-02-23 17:55:21 +000013226 xmlFreeParserCtxt(ctxt);
13227
13228 return ret;
13229}
Daniel Veillard81273902003-09-30 00:43:48 +000013230#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013231
13232/************************************************************************
13233 * *
13234 * Front ends when parsing from memory *
13235 * *
13236 ************************************************************************/
13237
13238/**
13239 * xmlCreateMemoryParserCtxt:
13240 * @buffer: a pointer to a char array
13241 * @size: the size of the array
13242 *
13243 * Create a parser context for an XML in-memory document.
13244 *
13245 * Returns the new parser context or NULL
13246 */
13247xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013248xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013249 xmlParserCtxtPtr ctxt;
13250 xmlParserInputPtr input;
13251 xmlParserInputBufferPtr buf;
13252
13253 if (buffer == NULL)
13254 return(NULL);
13255 if (size <= 0)
13256 return(NULL);
13257
13258 ctxt = xmlNewParserCtxt();
13259 if (ctxt == NULL)
13260 return(NULL);
13261
Daniel Veillard53350552003-09-18 13:35:51 +000013262 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013263 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013264 if (buf == NULL) {
13265 xmlFreeParserCtxt(ctxt);
13266 return(NULL);
13267 }
Owen Taylor3473f882001-02-23 17:55:21 +000013268
13269 input = xmlNewInputStream(ctxt);
13270 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013271 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013272 xmlFreeParserCtxt(ctxt);
13273 return(NULL);
13274 }
13275
13276 input->filename = NULL;
13277 input->buf = buf;
13278 input->base = input->buf->buffer->content;
13279 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013280 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013281
13282 inputPush(ctxt, input);
13283 return(ctxt);
13284}
13285
Daniel Veillard81273902003-09-30 00:43:48 +000013286#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013287/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013288 * xmlSAXParseMemoryWithData:
13289 * @sax: the SAX handler block
13290 * @buffer: an pointer to a char array
13291 * @size: the size of the array
13292 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13293 * documents
13294 * @data: the userdata
13295 *
13296 * parse an XML in-memory block and use the given SAX function block
13297 * to handle the parsing callback. If sax is NULL, fallback to the default
13298 * DOM tree building routines.
13299 *
13300 * User data (void *) is stored within the parser context in the
13301 * context's _private member, so it is available nearly everywhere in libxml
13302 *
13303 * Returns the resulting document tree
13304 */
13305
13306xmlDocPtr
13307xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13308 int size, int recovery, void *data) {
13309 xmlDocPtr ret;
13310 xmlParserCtxtPtr ctxt;
13311
13312 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13313 if (ctxt == NULL) return(NULL);
13314 if (sax != NULL) {
13315 if (ctxt->sax != NULL)
13316 xmlFree(ctxt->sax);
13317 ctxt->sax = sax;
13318 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013319 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013320 if (data!=NULL) {
13321 ctxt->_private=data;
13322 }
13323
Daniel Veillardadba5f12003-04-04 16:09:01 +000013324 ctxt->recovery = recovery;
13325
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013326 xmlParseDocument(ctxt);
13327
13328 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13329 else {
13330 ret = NULL;
13331 xmlFreeDoc(ctxt->myDoc);
13332 ctxt->myDoc = NULL;
13333 }
13334 if (sax != NULL)
13335 ctxt->sax = NULL;
13336 xmlFreeParserCtxt(ctxt);
13337
13338 return(ret);
13339}
13340
13341/**
Owen Taylor3473f882001-02-23 17:55:21 +000013342 * xmlSAXParseMemory:
13343 * @sax: the SAX handler block
13344 * @buffer: an pointer to a char array
13345 * @size: the size of the array
13346 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13347 * documents
13348 *
13349 * parse an XML in-memory block and use the given SAX function block
13350 * to handle the parsing callback. If sax is NULL, fallback to the default
13351 * DOM tree building routines.
13352 *
13353 * Returns the resulting document tree
13354 */
13355xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013356xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13357 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013358 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013359}
13360
13361/**
13362 * xmlParseMemory:
13363 * @buffer: an pointer to a char array
13364 * @size: the size of the array
13365 *
13366 * parse an XML in-memory block and build a tree.
13367 *
13368 * Returns the resulting document tree
13369 */
13370
Daniel Veillard50822cb2001-07-26 20:05:51 +000013371xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013372 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13373}
13374
13375/**
13376 * xmlRecoverMemory:
13377 * @buffer: an pointer to a char array
13378 * @size: the size of the array
13379 *
13380 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013381 * In the case the document is not Well Formed, an attempt to
13382 * build a tree is tried anyway
13383 *
13384 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013385 */
13386
Daniel Veillard50822cb2001-07-26 20:05:51 +000013387xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013388 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13389}
13390
13391/**
13392 * xmlSAXUserParseMemory:
13393 * @sax: a SAX handler
13394 * @user_data: The user data returned on SAX callbacks
13395 * @buffer: an in-memory XML document input
13396 * @size: the length of the XML document in bytes
13397 *
13398 * A better SAX parsing routine.
13399 * parse an XML in-memory buffer and call the given SAX handler routines.
13400 *
13401 * Returns 0 in case of success or a error number otherwise
13402 */
13403int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013404 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013405 int ret = 0;
13406 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013407
13408 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13409 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013410 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13411 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013412 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013413 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013414
Daniel Veillard30211a02001-04-26 09:33:18 +000013415 if (user_data != NULL)
13416 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013417
13418 xmlParseDocument(ctxt);
13419
13420 if (ctxt->wellFormed)
13421 ret = 0;
13422 else {
13423 if (ctxt->errNo != 0)
13424 ret = ctxt->errNo;
13425 else
13426 ret = -1;
13427 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013428 if (sax != NULL)
13429 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013430 if (ctxt->myDoc != NULL) {
13431 xmlFreeDoc(ctxt->myDoc);
13432 ctxt->myDoc = NULL;
13433 }
Owen Taylor3473f882001-02-23 17:55:21 +000013434 xmlFreeParserCtxt(ctxt);
13435
13436 return ret;
13437}
Daniel Veillard81273902003-09-30 00:43:48 +000013438#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013439
13440/**
13441 * xmlCreateDocParserCtxt:
13442 * @cur: a pointer to an array of xmlChar
13443 *
13444 * Creates a parser context for an XML in-memory document.
13445 *
13446 * Returns the new parser context or NULL
13447 */
13448xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013449xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013450 int len;
13451
13452 if (cur == NULL)
13453 return(NULL);
13454 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013455 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013456}
13457
Daniel Veillard81273902003-09-30 00:43:48 +000013458#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013459/**
13460 * xmlSAXParseDoc:
13461 * @sax: the SAX handler block
13462 * @cur: a pointer to an array of xmlChar
13463 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13464 * documents
13465 *
13466 * parse an XML in-memory document and build a tree.
13467 * It use the given SAX function block to handle the parsing callback.
13468 * If sax is NULL, fallback to the default DOM tree building routines.
13469 *
13470 * Returns the resulting document tree
13471 */
13472
13473xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013474xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013475 xmlDocPtr ret;
13476 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013477 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013478
Daniel Veillard38936062004-11-04 17:45:11 +000013479 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013480
13481
13482 ctxt = xmlCreateDocParserCtxt(cur);
13483 if (ctxt == NULL) return(NULL);
13484 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013485 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013486 ctxt->sax = sax;
13487 ctxt->userData = NULL;
13488 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013489 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013490
13491 xmlParseDocument(ctxt);
13492 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13493 else {
13494 ret = NULL;
13495 xmlFreeDoc(ctxt->myDoc);
13496 ctxt->myDoc = NULL;
13497 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013498 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013499 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013500 xmlFreeParserCtxt(ctxt);
13501
13502 return(ret);
13503}
13504
13505/**
13506 * xmlParseDoc:
13507 * @cur: a pointer to an array of xmlChar
13508 *
13509 * parse an XML in-memory document and build a tree.
13510 *
13511 * Returns the resulting document tree
13512 */
13513
13514xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013515xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013516 return(xmlSAXParseDoc(NULL, cur, 0));
13517}
Daniel Veillard81273902003-09-30 00:43:48 +000013518#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013519
Daniel Veillard81273902003-09-30 00:43:48 +000013520#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013521/************************************************************************
13522 * *
13523 * Specific function to keep track of entities references *
13524 * and used by the XSLT debugger *
13525 * *
13526 ************************************************************************/
13527
13528static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13529
13530/**
13531 * xmlAddEntityReference:
13532 * @ent : A valid entity
13533 * @firstNode : A valid first node for children of entity
13534 * @lastNode : A valid last node of children entity
13535 *
13536 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13537 */
13538static void
13539xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13540 xmlNodePtr lastNode)
13541{
13542 if (xmlEntityRefFunc != NULL) {
13543 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13544 }
13545}
13546
13547
13548/**
13549 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013550 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013551 *
13552 * Set the function to call call back when a xml reference has been made
13553 */
13554void
13555xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13556{
13557 xmlEntityRefFunc = func;
13558}
Daniel Veillard81273902003-09-30 00:43:48 +000013559#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013560
13561/************************************************************************
13562 * *
13563 * Miscellaneous *
13564 * *
13565 ************************************************************************/
13566
13567#ifdef LIBXML_XPATH_ENABLED
13568#include <libxml/xpath.h>
13569#endif
13570
Daniel Veillardffa3c742005-07-21 13:24:09 +000013571extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013572static int xmlParserInitialized = 0;
13573
13574/**
13575 * xmlInitParser:
13576 *
13577 * Initialization function for the XML parser.
13578 * This is not reentrant. Call once before processing in case of
13579 * use in multithreaded programs.
13580 */
13581
13582void
13583xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013584 if (xmlParserInitialized != 0)
13585 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013586
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013587#ifdef LIBXML_THREAD_ENABLED
13588 __xmlGlobalInitMutexLock();
13589 if (xmlParserInitialized == 0) {
13590#endif
13591 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13592 (xmlGenericError == NULL))
13593 initGenericErrorDefaultFunc(NULL);
13594 xmlInitGlobals();
13595 xmlInitThreads();
13596 xmlInitMemory();
13597 xmlInitCharEncodingHandlers();
13598 xmlDefaultSAXHandlerInit();
13599 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013600#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013601 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013602#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013603#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013604 htmlInitAutoClose();
13605 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013606#endif
13607#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013608 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013609#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013610 xmlParserInitialized = 1;
13611#ifdef LIBXML_THREAD_ENABLED
13612 }
13613 __xmlGlobalInitMutexUnlock();
13614#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013615}
13616
13617/**
13618 * xmlCleanupParser:
13619 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013620 * This function name is somewhat misleading. It does not clean up
13621 * parser state, it cleans up memory allocated by the library itself.
13622 * It is a cleanup function for the XML library. It tries to reclaim all
13623 * related global memory allocated for the library processing.
13624 * It doesn't deallocate any document related memory. One should
13625 * call xmlCleanupParser() only when the process has finished using
13626 * the library and all XML/HTML documents built with it.
13627 * See also xmlInitParser() which has the opposite function of preparing
13628 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013629 */
13630
13631void
13632xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013633 if (!xmlParserInitialized)
13634 return;
13635
Owen Taylor3473f882001-02-23 17:55:21 +000013636 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013637#ifdef LIBXML_CATALOG_ENABLED
13638 xmlCatalogCleanup();
13639#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013640 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013641 xmlCleanupInputCallbacks();
13642#ifdef LIBXML_OUTPUT_ENABLED
13643 xmlCleanupOutputCallbacks();
13644#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013645#ifdef LIBXML_SCHEMAS_ENABLED
13646 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013647 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013648#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013649 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013650 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013651 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013652 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013653 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013654}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013655
13656/************************************************************************
13657 * *
13658 * New set (2.6.0) of simpler and more flexible APIs *
13659 * *
13660 ************************************************************************/
13661
13662/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013663 * DICT_FREE:
13664 * @str: a string
13665 *
13666 * Free a string if it is not owned by the "dict" dictionnary in the
13667 * current scope
13668 */
13669#define DICT_FREE(str) \
13670 if ((str) && ((!dict) || \
13671 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13672 xmlFree((char *)(str));
13673
13674/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013675 * xmlCtxtReset:
13676 * @ctxt: an XML parser context
13677 *
13678 * Reset a parser context
13679 */
13680void
13681xmlCtxtReset(xmlParserCtxtPtr ctxt)
13682{
13683 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013684 xmlDictPtr dict;
13685
13686 if (ctxt == NULL)
13687 return;
13688
13689 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013690
13691 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13692 xmlFreeInputStream(input);
13693 }
13694 ctxt->inputNr = 0;
13695 ctxt->input = NULL;
13696
13697 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013698 if (ctxt->spaceTab != NULL) {
13699 ctxt->spaceTab[0] = -1;
13700 ctxt->space = &ctxt->spaceTab[0];
13701 } else {
13702 ctxt->space = NULL;
13703 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013704
13705
13706 ctxt->nodeNr = 0;
13707 ctxt->node = NULL;
13708
13709 ctxt->nameNr = 0;
13710 ctxt->name = NULL;
13711
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013712 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013713 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013714 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013715 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013716 DICT_FREE(ctxt->directory);
13717 ctxt->directory = NULL;
13718 DICT_FREE(ctxt->extSubURI);
13719 ctxt->extSubURI = NULL;
13720 DICT_FREE(ctxt->extSubSystem);
13721 ctxt->extSubSystem = NULL;
13722 if (ctxt->myDoc != NULL)
13723 xmlFreeDoc(ctxt->myDoc);
13724 ctxt->myDoc = NULL;
13725
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013726 ctxt->standalone = -1;
13727 ctxt->hasExternalSubset = 0;
13728 ctxt->hasPErefs = 0;
13729 ctxt->html = 0;
13730 ctxt->external = 0;
13731 ctxt->instate = XML_PARSER_START;
13732 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013733
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013734 ctxt->wellFormed = 1;
13735 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013736 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013737 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013738#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013739 ctxt->vctxt.userData = ctxt;
13740 ctxt->vctxt.error = xmlParserValidityError;
13741 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013742#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013743 ctxt->record_info = 0;
13744 ctxt->nbChars = 0;
13745 ctxt->checkIndex = 0;
13746 ctxt->inSubset = 0;
13747 ctxt->errNo = XML_ERR_OK;
13748 ctxt->depth = 0;
13749 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13750 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013751 ctxt->nbentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013752 xmlInitNodeInfoSeq(&ctxt->node_seq);
13753
13754 if (ctxt->attsDefault != NULL) {
13755 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13756 ctxt->attsDefault = NULL;
13757 }
13758 if (ctxt->attsSpecial != NULL) {
13759 xmlHashFree(ctxt->attsSpecial, NULL);
13760 ctxt->attsSpecial = NULL;
13761 }
13762
Daniel Veillard4432df22003-09-28 18:58:27 +000013763#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013764 if (ctxt->catalogs != NULL)
13765 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013766#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013767 if (ctxt->lastError.code != XML_ERR_OK)
13768 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013769}
13770
13771/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013772 * xmlCtxtResetPush:
13773 * @ctxt: an XML parser context
13774 * @chunk: a pointer to an array of chars
13775 * @size: number of chars in the array
13776 * @filename: an optional file name or URI
13777 * @encoding: the document encoding, or NULL
13778 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013779 * Reset a push parser context
13780 *
13781 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013782 */
13783int
13784xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13785 int size, const char *filename, const char *encoding)
13786{
13787 xmlParserInputPtr inputStream;
13788 xmlParserInputBufferPtr buf;
13789 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13790
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013791 if (ctxt == NULL)
13792 return(1);
13793
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013794 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13795 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13796
13797 buf = xmlAllocParserInputBuffer(enc);
13798 if (buf == NULL)
13799 return(1);
13800
13801 if (ctxt == NULL) {
13802 xmlFreeParserInputBuffer(buf);
13803 return(1);
13804 }
13805
13806 xmlCtxtReset(ctxt);
13807
13808 if (ctxt->pushTab == NULL) {
13809 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13810 sizeof(xmlChar *));
13811 if (ctxt->pushTab == NULL) {
13812 xmlErrMemory(ctxt, NULL);
13813 xmlFreeParserInputBuffer(buf);
13814 return(1);
13815 }
13816 }
13817
13818 if (filename == NULL) {
13819 ctxt->directory = NULL;
13820 } else {
13821 ctxt->directory = xmlParserGetDirectory(filename);
13822 }
13823
13824 inputStream = xmlNewInputStream(ctxt);
13825 if (inputStream == NULL) {
13826 xmlFreeParserInputBuffer(buf);
13827 return(1);
13828 }
13829
13830 if (filename == NULL)
13831 inputStream->filename = NULL;
13832 else
13833 inputStream->filename = (char *)
13834 xmlCanonicPath((const xmlChar *) filename);
13835 inputStream->buf = buf;
13836 inputStream->base = inputStream->buf->buffer->content;
13837 inputStream->cur = inputStream->buf->buffer->content;
13838 inputStream->end =
13839 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13840
13841 inputPush(ctxt, inputStream);
13842
13843 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13844 (ctxt->input->buf != NULL)) {
13845 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13846 int cur = ctxt->input->cur - ctxt->input->base;
13847
13848 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13849
13850 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13851 ctxt->input->cur = ctxt->input->base + cur;
13852 ctxt->input->end =
13853 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13854 use];
13855#ifdef DEBUG_PUSH
13856 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13857#endif
13858 }
13859
13860 if (encoding != NULL) {
13861 xmlCharEncodingHandlerPtr hdlr;
13862
Daniel Veillard37334572008-07-31 08:20:02 +000013863 if (ctxt->encoding != NULL)
13864 xmlFree((xmlChar *) ctxt->encoding);
13865 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13866
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013867 hdlr = xmlFindCharEncodingHandler(encoding);
13868 if (hdlr != NULL) {
13869 xmlSwitchToEncoding(ctxt, hdlr);
13870 } else {
13871 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13872 "Unsupported encoding %s\n", BAD_CAST encoding);
13873 }
13874 } else if (enc != XML_CHAR_ENCODING_NONE) {
13875 xmlSwitchEncoding(ctxt, enc);
13876 }
13877
13878 return(0);
13879}
13880
Daniel Veillard37334572008-07-31 08:20:02 +000013881
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013882/**
Daniel Veillard37334572008-07-31 08:20:02 +000013883 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013884 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013885 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000013886 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013887 *
13888 * Applies the options to the parser context
13889 *
13890 * Returns 0 in case of success, the set of unknown or unimplemented options
13891 * in case of error.
13892 */
Daniel Veillard37334572008-07-31 08:20:02 +000013893static int
13894xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013895{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013896 if (ctxt == NULL)
13897 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000013898 if (encoding != NULL) {
13899 if (ctxt->encoding != NULL)
13900 xmlFree((xmlChar *) ctxt->encoding);
13901 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13902 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013903 if (options & XML_PARSE_RECOVER) {
13904 ctxt->recovery = 1;
13905 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013906 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013907 } else
13908 ctxt->recovery = 0;
13909 if (options & XML_PARSE_DTDLOAD) {
13910 ctxt->loadsubset = XML_DETECT_IDS;
13911 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013912 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013913 } else
13914 ctxt->loadsubset = 0;
13915 if (options & XML_PARSE_DTDATTR) {
13916 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13917 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013918 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013919 }
13920 if (options & XML_PARSE_NOENT) {
13921 ctxt->replaceEntities = 1;
13922 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13923 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013924 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013925 } else
13926 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013927 if (options & XML_PARSE_PEDANTIC) {
13928 ctxt->pedantic = 1;
13929 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013930 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013931 } else
13932 ctxt->pedantic = 0;
13933 if (options & XML_PARSE_NOBLANKS) {
13934 ctxt->keepBlanks = 0;
13935 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13936 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013937 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013938 } else
13939 ctxt->keepBlanks = 1;
13940 if (options & XML_PARSE_DTDVALID) {
13941 ctxt->validate = 1;
13942 if (options & XML_PARSE_NOWARNING)
13943 ctxt->vctxt.warning = NULL;
13944 if (options & XML_PARSE_NOERROR)
13945 ctxt->vctxt.error = NULL;
13946 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013947 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013948 } else
13949 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013950 if (options & XML_PARSE_NOWARNING) {
13951 ctxt->sax->warning = NULL;
13952 options -= XML_PARSE_NOWARNING;
13953 }
13954 if (options & XML_PARSE_NOERROR) {
13955 ctxt->sax->error = NULL;
13956 ctxt->sax->fatalError = NULL;
13957 options -= XML_PARSE_NOERROR;
13958 }
Daniel Veillard81273902003-09-30 00:43:48 +000013959#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013960 if (options & XML_PARSE_SAX1) {
13961 ctxt->sax->startElement = xmlSAX2StartElement;
13962 ctxt->sax->endElement = xmlSAX2EndElement;
13963 ctxt->sax->startElementNs = NULL;
13964 ctxt->sax->endElementNs = NULL;
13965 ctxt->sax->initialized = 1;
13966 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013967 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013968 }
Daniel Veillard81273902003-09-30 00:43:48 +000013969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013970 if (options & XML_PARSE_NODICT) {
13971 ctxt->dictNames = 0;
13972 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013973 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013974 } else {
13975 ctxt->dictNames = 1;
13976 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013977 if (options & XML_PARSE_NOCDATA) {
13978 ctxt->sax->cdataBlock = NULL;
13979 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013980 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013981 }
13982 if (options & XML_PARSE_NSCLEAN) {
13983 ctxt->options |= XML_PARSE_NSCLEAN;
13984 options -= XML_PARSE_NSCLEAN;
13985 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013986 if (options & XML_PARSE_NONET) {
13987 ctxt->options |= XML_PARSE_NONET;
13988 options -= XML_PARSE_NONET;
13989 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013990 if (options & XML_PARSE_COMPACT) {
13991 ctxt->options |= XML_PARSE_COMPACT;
13992 options -= XML_PARSE_COMPACT;
13993 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000013994 if (options & XML_PARSE_OLD10) {
13995 ctxt->options |= XML_PARSE_OLD10;
13996 options -= XML_PARSE_OLD10;
13997 }
Daniel Veillard8915c152008-08-26 13:05:34 +000013998 if (options & XML_PARSE_NOBASEFIX) {
13999 ctxt->options |= XML_PARSE_NOBASEFIX;
14000 options -= XML_PARSE_NOBASEFIX;
14001 }
14002 if (options & XML_PARSE_HUGE) {
14003 ctxt->options |= XML_PARSE_HUGE;
14004 options -= XML_PARSE_HUGE;
14005 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014006 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014007 return (options);
14008}
14009
14010/**
Daniel Veillard37334572008-07-31 08:20:02 +000014011 * xmlCtxtUseOptions:
14012 * @ctxt: an XML parser context
14013 * @options: a combination of xmlParserOption
14014 *
14015 * Applies the options to the parser context
14016 *
14017 * Returns 0 in case of success, the set of unknown or unimplemented options
14018 * in case of error.
14019 */
14020int
14021xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14022{
14023 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14024}
14025
14026/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014027 * xmlDoRead:
14028 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014029 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014030 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014031 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014032 * @reuse: keep the context for reuse
14033 *
14034 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014035 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014036 * Returns the resulting document tree or NULL
14037 */
14038static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014039xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14040 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014041{
14042 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014043
14044 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014045 if (encoding != NULL) {
14046 xmlCharEncodingHandlerPtr hdlr;
14047
14048 hdlr = xmlFindCharEncodingHandler(encoding);
14049 if (hdlr != NULL)
14050 xmlSwitchToEncoding(ctxt, hdlr);
14051 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014052 if ((URL != NULL) && (ctxt->input != NULL) &&
14053 (ctxt->input->filename == NULL))
14054 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014055 xmlParseDocument(ctxt);
14056 if ((ctxt->wellFormed) || ctxt->recovery)
14057 ret = ctxt->myDoc;
14058 else {
14059 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014060 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014061 xmlFreeDoc(ctxt->myDoc);
14062 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014063 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014064 ctxt->myDoc = NULL;
14065 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014066 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014067 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014068
14069 return (ret);
14070}
14071
14072/**
14073 * xmlReadDoc:
14074 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014075 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014076 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014077 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014078 *
14079 * parse an XML in-memory document and build a tree.
14080 *
14081 * Returns the resulting document tree
14082 */
14083xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014084xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014085{
14086 xmlParserCtxtPtr ctxt;
14087
14088 if (cur == NULL)
14089 return (NULL);
14090
14091 ctxt = xmlCreateDocParserCtxt(cur);
14092 if (ctxt == NULL)
14093 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014094 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014095}
14096
14097/**
14098 * xmlReadFile:
14099 * @filename: a file or URL
14100 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014101 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014102 *
14103 * parse an XML file from the filesystem or the network.
14104 *
14105 * Returns the resulting document tree
14106 */
14107xmlDocPtr
14108xmlReadFile(const char *filename, const char *encoding, int options)
14109{
14110 xmlParserCtxtPtr ctxt;
14111
Daniel Veillard61b93382003-11-03 14:28:31 +000014112 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014113 if (ctxt == NULL)
14114 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014115 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014116}
14117
14118/**
14119 * xmlReadMemory:
14120 * @buffer: a pointer to a char array
14121 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014122 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014123 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014124 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014125 *
14126 * parse an XML in-memory document and build a tree.
14127 *
14128 * Returns the resulting document tree
14129 */
14130xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014131xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014132{
14133 xmlParserCtxtPtr ctxt;
14134
14135 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14136 if (ctxt == NULL)
14137 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014138 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014139}
14140
14141/**
14142 * xmlReadFd:
14143 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014144 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014145 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014146 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014147 *
14148 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014149 * NOTE that the file descriptor will not be closed when the
14150 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014151 *
14152 * Returns the resulting document tree
14153 */
14154xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014155xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014156{
14157 xmlParserCtxtPtr ctxt;
14158 xmlParserInputBufferPtr input;
14159 xmlParserInputPtr stream;
14160
14161 if (fd < 0)
14162 return (NULL);
14163
14164 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14165 if (input == NULL)
14166 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014167 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014168 ctxt = xmlNewParserCtxt();
14169 if (ctxt == NULL) {
14170 xmlFreeParserInputBuffer(input);
14171 return (NULL);
14172 }
14173 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14174 if (stream == NULL) {
14175 xmlFreeParserInputBuffer(input);
14176 xmlFreeParserCtxt(ctxt);
14177 return (NULL);
14178 }
14179 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014180 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014181}
14182
14183/**
14184 * xmlReadIO:
14185 * @ioread: an I/O read function
14186 * @ioclose: an I/O close function
14187 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014188 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014189 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014190 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014191 *
14192 * parse an XML document from I/O functions and source and build a tree.
14193 *
14194 * Returns the resulting document tree
14195 */
14196xmlDocPtr
14197xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014198 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014199{
14200 xmlParserCtxtPtr ctxt;
14201 xmlParserInputBufferPtr input;
14202 xmlParserInputPtr stream;
14203
14204 if (ioread == NULL)
14205 return (NULL);
14206
14207 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14208 XML_CHAR_ENCODING_NONE);
14209 if (input == NULL)
14210 return (NULL);
14211 ctxt = xmlNewParserCtxt();
14212 if (ctxt == NULL) {
14213 xmlFreeParserInputBuffer(input);
14214 return (NULL);
14215 }
14216 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14217 if (stream == NULL) {
14218 xmlFreeParserInputBuffer(input);
14219 xmlFreeParserCtxt(ctxt);
14220 return (NULL);
14221 }
14222 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014223 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014224}
14225
14226/**
14227 * xmlCtxtReadDoc:
14228 * @ctxt: an XML parser context
14229 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014230 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014231 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014232 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014233 *
14234 * parse an XML in-memory document and build a tree.
14235 * This reuses the existing @ctxt parser context
14236 *
14237 * Returns the resulting document tree
14238 */
14239xmlDocPtr
14240xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014241 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014242{
14243 xmlParserInputPtr stream;
14244
14245 if (cur == NULL)
14246 return (NULL);
14247 if (ctxt == NULL)
14248 return (NULL);
14249
14250 xmlCtxtReset(ctxt);
14251
14252 stream = xmlNewStringInputStream(ctxt, cur);
14253 if (stream == NULL) {
14254 return (NULL);
14255 }
14256 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014257 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014258}
14259
14260/**
14261 * xmlCtxtReadFile:
14262 * @ctxt: an XML parser context
14263 * @filename: a file or URL
14264 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014265 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014266 *
14267 * parse an XML file from the filesystem or the network.
14268 * This reuses the existing @ctxt parser context
14269 *
14270 * Returns the resulting document tree
14271 */
14272xmlDocPtr
14273xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14274 const char *encoding, int options)
14275{
14276 xmlParserInputPtr stream;
14277
14278 if (filename == NULL)
14279 return (NULL);
14280 if (ctxt == NULL)
14281 return (NULL);
14282
14283 xmlCtxtReset(ctxt);
14284
Daniel Veillard29614c72004-11-26 10:47:26 +000014285 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014286 if (stream == NULL) {
14287 return (NULL);
14288 }
14289 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014290 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291}
14292
14293/**
14294 * xmlCtxtReadMemory:
14295 * @ctxt: an XML parser context
14296 * @buffer: a pointer to a char array
14297 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014298 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014299 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014300 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 *
14302 * parse an XML in-memory document and build a tree.
14303 * This reuses the existing @ctxt parser context
14304 *
14305 * Returns the resulting document tree
14306 */
14307xmlDocPtr
14308xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014309 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014310{
14311 xmlParserInputBufferPtr input;
14312 xmlParserInputPtr stream;
14313
14314 if (ctxt == NULL)
14315 return (NULL);
14316 if (buffer == NULL)
14317 return (NULL);
14318
14319 xmlCtxtReset(ctxt);
14320
14321 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14322 if (input == NULL) {
14323 return(NULL);
14324 }
14325
14326 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14327 if (stream == NULL) {
14328 xmlFreeParserInputBuffer(input);
14329 return(NULL);
14330 }
14331
14332 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014333 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014334}
14335
14336/**
14337 * xmlCtxtReadFd:
14338 * @ctxt: an XML parser context
14339 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014340 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014342 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014343 *
14344 * parse an XML from a file descriptor and build a tree.
14345 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014346 * NOTE that the file descriptor will not be closed when the
14347 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 *
14349 * Returns the resulting document tree
14350 */
14351xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014352xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14353 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014354{
14355 xmlParserInputBufferPtr input;
14356 xmlParserInputPtr stream;
14357
14358 if (fd < 0)
14359 return (NULL);
14360 if (ctxt == NULL)
14361 return (NULL);
14362
14363 xmlCtxtReset(ctxt);
14364
14365
14366 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14367 if (input == NULL)
14368 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014369 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014370 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14371 if (stream == NULL) {
14372 xmlFreeParserInputBuffer(input);
14373 return (NULL);
14374 }
14375 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014376 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014377}
14378
14379/**
14380 * xmlCtxtReadIO:
14381 * @ctxt: an XML parser context
14382 * @ioread: an I/O read function
14383 * @ioclose: an I/O close function
14384 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014385 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014386 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014387 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014388 *
14389 * parse an XML document from I/O functions and source and build a tree.
14390 * This reuses the existing @ctxt parser context
14391 *
14392 * Returns the resulting document tree
14393 */
14394xmlDocPtr
14395xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14396 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014397 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014398 const char *encoding, int options)
14399{
14400 xmlParserInputBufferPtr input;
14401 xmlParserInputPtr stream;
14402
14403 if (ioread == NULL)
14404 return (NULL);
14405 if (ctxt == NULL)
14406 return (NULL);
14407
14408 xmlCtxtReset(ctxt);
14409
14410 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14411 XML_CHAR_ENCODING_NONE);
14412 if (input == NULL)
14413 return (NULL);
14414 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14415 if (stream == NULL) {
14416 xmlFreeParserInputBuffer(input);
14417 return (NULL);
14418 }
14419 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014420 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014421}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014422
14423#define bottom_parser
14424#include "elfgcchack.h"