blob: ce6e0039a64fd018140f1dfb7a0c4903bfd98407 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
2180 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2181 if (xmlParserDebugEntities)
2182 xmlGenericError(xmlGenericErrorContext,
2183 "String decoding Entity Reference: %.30s\n",
2184 str);
2185 ent = xmlParseStringEntityRef(ctxt, &str);
2186 if ((ent != NULL) &&
2187 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2188 if (ent->content != NULL) {
2189 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2190 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002191 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2192 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002193 }
2194 } else if ((ent != NULL) && (ent->content != NULL)) {
2195 xmlChar *rep;
2196
2197 ctxt->depth++;
2198 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2199 0, 0, 0);
2200 ctxt->depth--;
2201 if (rep != NULL) {
2202 current = rep;
2203 while (*current != 0) { /* non input consuming loop */
2204 buffer[nbchars++] = *current++;
2205 if (nbchars >
2206 buffer_size - XML_PARSER_BUFFER_SIZE) {
2207 growBuffer(buffer);
2208 }
2209 }
2210 xmlFree(rep);
2211 }
2212 } else if (ent != NULL) {
2213 int i = xmlStrlen(ent->name);
2214 const xmlChar *cur = ent->name;
2215
2216 buffer[nbchars++] = '&';
2217 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2218 growBuffer(buffer);
2219 }
2220 for (;i > 0;i--)
2221 buffer[nbchars++] = *cur++;
2222 buffer[nbchars++] = ';';
2223 }
2224 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2225 if (xmlParserDebugEntities)
2226 xmlGenericError(xmlGenericErrorContext,
2227 "String decoding PE Reference: %.30s\n", str);
2228 ent = xmlParseStringPEReference(ctxt, &str);
2229 if (ent != NULL) {
2230 xmlChar *rep;
2231
2232 ctxt->depth++;
2233 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2234 0, 0, 0);
2235 ctxt->depth--;
2236 if (rep != NULL) {
2237 current = rep;
2238 while (*current != 0) { /* non input consuming loop */
2239 buffer[nbchars++] = *current++;
2240 if (nbchars >
2241 buffer_size - XML_PARSER_BUFFER_SIZE) {
2242 growBuffer(buffer);
2243 }
2244 }
2245 xmlFree(rep);
2246 }
2247 }
2248 } else {
2249 COPY_BUF(l,buffer,nbchars,c);
2250 str += l;
2251 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2252 growBuffer(buffer);
2253 }
2254 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002255 if (str < last)
2256 c = CUR_SCHAR(str, l);
2257 else
2258 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 }
2260 buffer[nbchars++] = 0;
2261 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002262
2263mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002264 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002265 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002266}
2267
Daniel Veillarde57ec792003-09-10 10:50:59 +00002268/**
2269 * xmlStringDecodeEntities:
2270 * @ctxt: the parser context
2271 * @str: the input string
2272 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2273 * @end: an end marker xmlChar, 0 if none
2274 * @end2: an end marker xmlChar, 0 if none
2275 * @end3: an end marker xmlChar, 0 if none
2276 *
2277 * Takes a entity string content and process to do the adequate substitutions.
2278 *
2279 * [67] Reference ::= EntityRef | CharRef
2280 *
2281 * [69] PEReference ::= '%' Name ';'
2282 *
2283 * Returns A newly allocated string with the substitution done. The caller
2284 * must deallocate it !
2285 */
2286xmlChar *
2287xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2288 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002289 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002290 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2291 end, end2, end3));
2292}
Owen Taylor3473f882001-02-23 17:55:21 +00002293
2294/************************************************************************
2295 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002296 * Commodity functions, cleanup needed ? *
2297 * *
2298 ************************************************************************/
2299
2300/**
2301 * areBlanks:
2302 * @ctxt: an XML parser context
2303 * @str: a xmlChar *
2304 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002305 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002306 *
2307 * Is this a sequence of blank chars that one can ignore ?
2308 *
2309 * Returns 1 if ignorable 0 otherwise.
2310 */
2311
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002312static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2313 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002314 int i, ret;
2315 xmlNodePtr lastChild;
2316
Daniel Veillard05c13a22001-09-09 08:38:09 +00002317 /*
2318 * Don't spend time trying to differentiate them, the same callback is
2319 * used !
2320 */
2321 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002322 return(0);
2323
Owen Taylor3473f882001-02-23 17:55:21 +00002324 /*
2325 * Check for xml:space value.
2326 */
2327 if (*(ctxt->space) == 1)
2328 return(0);
2329
2330 /*
2331 * Check that the string is made of blanks
2332 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002333 if (blank_chars == 0) {
2334 for (i = 0;i < len;i++)
2335 if (!(IS_BLANK_CH(str[i]))) return(0);
2336 }
Owen Taylor3473f882001-02-23 17:55:21 +00002337
2338 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002339 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002340 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002341 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002342 if (ctxt->myDoc != NULL) {
2343 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2344 if (ret == 0) return(1);
2345 if (ret == 1) return(0);
2346 }
2347
2348 /*
2349 * Otherwise, heuristic :-\
2350 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002351 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002352 if ((ctxt->node->children == NULL) &&
2353 (RAW == '<') && (NXT(1) == '/')) return(0);
2354
2355 lastChild = xmlGetLastChild(ctxt->node);
2356 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002357 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2358 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002359 } else if (xmlNodeIsText(lastChild))
2360 return(0);
2361 else if ((ctxt->node->children != NULL) &&
2362 (xmlNodeIsText(ctxt->node->children)))
2363 return(0);
2364 return(1);
2365}
2366
Owen Taylor3473f882001-02-23 17:55:21 +00002367/************************************************************************
2368 * *
2369 * Extra stuff for namespace support *
2370 * Relates to http://www.w3.org/TR/WD-xml-names *
2371 * *
2372 ************************************************************************/
2373
2374/**
2375 * xmlSplitQName:
2376 * @ctxt: an XML parser context
2377 * @name: an XML parser context
2378 * @prefix: a xmlChar **
2379 *
2380 * parse an UTF8 encoded XML qualified name string
2381 *
2382 * [NS 5] QName ::= (Prefix ':')? LocalPart
2383 *
2384 * [NS 6] Prefix ::= NCName
2385 *
2386 * [NS 7] LocalPart ::= NCName
2387 *
2388 * Returns the local part, and prefix is updated
2389 * to get the Prefix if any.
2390 */
2391
2392xmlChar *
2393xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2394 xmlChar buf[XML_MAX_NAMELEN + 5];
2395 xmlChar *buffer = NULL;
2396 int len = 0;
2397 int max = XML_MAX_NAMELEN;
2398 xmlChar *ret = NULL;
2399 const xmlChar *cur = name;
2400 int c;
2401
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002402 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002403 *prefix = NULL;
2404
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002405 if (cur == NULL) return(NULL);
2406
Owen Taylor3473f882001-02-23 17:55:21 +00002407#ifndef XML_XML_NAMESPACE
2408 /* xml: prefix is not really a namespace */
2409 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2410 (cur[2] == 'l') && (cur[3] == ':'))
2411 return(xmlStrdup(name));
2412#endif
2413
Daniel Veillard597bc482003-07-24 16:08:28 +00002414 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002415 if (cur[0] == ':')
2416 return(xmlStrdup(name));
2417
2418 c = *cur++;
2419 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2420 buf[len++] = c;
2421 c = *cur++;
2422 }
2423 if (len >= max) {
2424 /*
2425 * Okay someone managed to make a huge name, so he's ready to pay
2426 * for the processing speed.
2427 */
2428 max = len * 2;
2429
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002430 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002431 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002432 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002433 return(NULL);
2434 }
2435 memcpy(buffer, buf, len);
2436 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2437 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002438 xmlChar *tmp;
2439
Owen Taylor3473f882001-02-23 17:55:21 +00002440 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002441 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002442 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002443 if (tmp == NULL) {
2444 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002445 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002446 return(NULL);
2447 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002448 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 }
2450 buffer[len++] = c;
2451 c = *cur++;
2452 }
2453 buffer[len] = 0;
2454 }
2455
Daniel Veillard597bc482003-07-24 16:08:28 +00002456 /* nasty but well=formed
2457 if ((c == ':') && (*cur == 0)) {
2458 return(xmlStrdup(name));
2459 } */
2460
Owen Taylor3473f882001-02-23 17:55:21 +00002461 if (buffer == NULL)
2462 ret = xmlStrndup(buf, len);
2463 else {
2464 ret = buffer;
2465 buffer = NULL;
2466 max = XML_MAX_NAMELEN;
2467 }
2468
2469
2470 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002471 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002472 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002473 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002474 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002475 }
Owen Taylor3473f882001-02-23 17:55:21 +00002476 len = 0;
2477
Daniel Veillardbb284f42002-10-16 18:02:47 +00002478 /*
2479 * Check that the first character is proper to start
2480 * a new name
2481 */
2482 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2483 ((c >= 0x41) && (c <= 0x5A)) ||
2484 (c == '_') || (c == ':'))) {
2485 int l;
2486 int first = CUR_SCHAR(cur, l);
2487
2488 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002489 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002490 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002491 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002492 }
2493 }
2494 cur++;
2495
Owen Taylor3473f882001-02-23 17:55:21 +00002496 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2497 buf[len++] = c;
2498 c = *cur++;
2499 }
2500 if (len >= max) {
2501 /*
2502 * Okay someone managed to make a huge name, so he's ready to pay
2503 * for the processing speed.
2504 */
2505 max = len * 2;
2506
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002507 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002508 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002509 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002510 return(NULL);
2511 }
2512 memcpy(buffer, buf, len);
2513 while (c != 0) { /* tested bigname2.xml */
2514 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002515 xmlChar *tmp;
2516
Owen Taylor3473f882001-02-23 17:55:21 +00002517 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002518 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002519 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002520 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002521 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002522 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 return(NULL);
2524 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002525 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002526 }
2527 buffer[len++] = c;
2528 c = *cur++;
2529 }
2530 buffer[len] = 0;
2531 }
2532
2533 if (buffer == NULL)
2534 ret = xmlStrndup(buf, len);
2535 else {
2536 ret = buffer;
2537 }
2538 }
2539
2540 return(ret);
2541}
2542
2543/************************************************************************
2544 * *
2545 * The parser itself *
2546 * Relates to http://www.w3.org/TR/REC-xml *
2547 * *
2548 ************************************************************************/
2549
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002550static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002551static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002552 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002553
Owen Taylor3473f882001-02-23 17:55:21 +00002554/**
2555 * xmlParseName:
2556 * @ctxt: an XML parser context
2557 *
2558 * parse an XML name.
2559 *
2560 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2561 * CombiningChar | Extender
2562 *
2563 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2564 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002565 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002566 *
2567 * Returns the Name parsed or NULL
2568 */
2569
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002570const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002571xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002572 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002573 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002574 int count = 0;
2575
2576 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002577
2578 /*
2579 * Accelerator for simple ASCII names
2580 */
2581 in = ctxt->input->cur;
2582 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2583 ((*in >= 0x41) && (*in <= 0x5A)) ||
2584 (*in == '_') || (*in == ':')) {
2585 in++;
2586 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2587 ((*in >= 0x41) && (*in <= 0x5A)) ||
2588 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002589 (*in == '_') || (*in == '-') ||
2590 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002591 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002592 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002593 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002594 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002595 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002596 ctxt->nbChars += count;
2597 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002598 if (ret == NULL)
2599 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002600 return(ret);
2601 }
2602 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002603 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002604}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002605
Daniel Veillard46de64e2002-05-29 08:21:33 +00002606/**
2607 * xmlParseNameAndCompare:
2608 * @ctxt: an XML parser context
2609 *
2610 * parse an XML name and compares for match
2611 * (specialized for endtag parsing)
2612 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002613 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2614 * and the name for mismatch
2615 */
2616
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002617static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002618xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002619 register const xmlChar *cmp = other;
2620 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002621 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002622
2623 GROW;
2624
2625 in = ctxt->input->cur;
2626 while (*in != 0 && *in == *cmp) {
2627 ++in;
2628 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002629 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002630 }
William M. Brack76e95df2003-10-18 16:20:14 +00002631 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002632 /* success */
2633 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002634 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002635 }
2636 /* failure (or end of input buffer), check with full function */
2637 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002638 /* strings coming from the dictionnary direct compare possible */
2639 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 return ret;
2643}
2644
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002645static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002646xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002647 int len = 0, l;
2648 int c;
2649 int count = 0;
2650
2651 /*
2652 * Handler for more complex cases
2653 */
2654 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002655 c = CUR_CHAR(l);
2656 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2657 (!IS_LETTER(c) && (c != '_') &&
2658 (c != ':'))) {
2659 return(NULL);
2660 }
2661
2662 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002663 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002664 (c == '.') || (c == '-') ||
2665 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002666 (IS_COMBINING(c)) ||
2667 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002668 if (count++ > 100) {
2669 count = 0;
2670 GROW;
2671 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002672 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002673 NEXTL(l);
2674 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002675 }
Daniel Veillard96688262005-08-23 18:14:12 +00002676 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2677 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002679}
2680
2681/**
2682 * xmlParseStringName:
2683 * @ctxt: an XML parser context
2684 * @str: a pointer to the string pointer (IN/OUT)
2685 *
2686 * parse an XML name.
2687 *
2688 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2689 * CombiningChar | Extender
2690 *
2691 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2692 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002693 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002694 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002695 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002696 * is updated to the current location in the string.
2697 */
2698
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002699static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002700xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2701 xmlChar buf[XML_MAX_NAMELEN + 5];
2702 const xmlChar *cur = *str;
2703 int len = 0, l;
2704 int c;
2705
2706 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002707 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002708 (c != ':')) {
2709 return(NULL);
2710 }
2711
William M. Brack871611b2003-10-18 04:53:14 +00002712 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002713 (c == '.') || (c == '-') ||
2714 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002715 (IS_COMBINING(c)) ||
2716 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002717 COPY_BUF(l,buf,len,c);
2718 cur += l;
2719 c = CUR_SCHAR(cur, l);
2720 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2721 /*
2722 * Okay someone managed to make a huge name, so he's ready to pay
2723 * for the processing speed.
2724 */
2725 xmlChar *buffer;
2726 int max = len * 2;
2727
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002728 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002729 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002730 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002731 return(NULL);
2732 }
2733 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002734 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002735 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002736 (c == '.') || (c == '-') ||
2737 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002738 (IS_COMBINING(c)) ||
2739 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002741 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002743 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002744 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002745 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002748 return(NULL);
2749 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002750 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002751 }
2752 COPY_BUF(l,buffer,len,c);
2753 cur += l;
2754 c = CUR_SCHAR(cur, l);
2755 }
2756 buffer[len] = 0;
2757 *str = cur;
2758 return(buffer);
2759 }
2760 }
2761 *str = cur;
2762 return(xmlStrndup(buf, len));
2763}
2764
2765/**
2766 * xmlParseNmtoken:
2767 * @ctxt: an XML parser context
2768 *
2769 * parse an XML Nmtoken.
2770 *
2771 * [7] Nmtoken ::= (NameChar)+
2772 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002773 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002774 *
2775 * Returns the Nmtoken parsed or NULL
2776 */
2777
2778xmlChar *
2779xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2780 xmlChar buf[XML_MAX_NAMELEN + 5];
2781 int len = 0, l;
2782 int c;
2783 int count = 0;
2784
2785 GROW;
2786 c = CUR_CHAR(l);
2787
William M. Brack871611b2003-10-18 04:53:14 +00002788 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002789 (c == '.') || (c == '-') ||
2790 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002791 (IS_COMBINING(c)) ||
2792 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002793 if (count++ > 100) {
2794 count = 0;
2795 GROW;
2796 }
2797 COPY_BUF(l,buf,len,c);
2798 NEXTL(l);
2799 c = CUR_CHAR(l);
2800 if (len >= XML_MAX_NAMELEN) {
2801 /*
2802 * Okay someone managed to make a huge token, so he's ready to pay
2803 * for the processing speed.
2804 */
2805 xmlChar *buffer;
2806 int max = len * 2;
2807
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002808 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002809 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
2813 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002814 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002815 (c == '.') || (c == '-') ||
2816 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002817 (IS_COMBINING(c)) ||
2818 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002819 if (count++ > 100) {
2820 count = 0;
2821 GROW;
2822 }
2823 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002824 xmlChar *tmp;
2825
Owen Taylor3473f882001-02-23 17:55:21 +00002826 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002827 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002828 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002829 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002830 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002831 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002832 return(NULL);
2833 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002834 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002835 }
2836 COPY_BUF(l,buffer,len,c);
2837 NEXTL(l);
2838 c = CUR_CHAR(l);
2839 }
2840 buffer[len] = 0;
2841 return(buffer);
2842 }
2843 }
2844 if (len == 0)
2845 return(NULL);
2846 return(xmlStrndup(buf, len));
2847}
2848
2849/**
2850 * xmlParseEntityValue:
2851 * @ctxt: an XML parser context
2852 * @orig: if non-NULL store a copy of the original entity value
2853 *
2854 * parse a value for ENTITY declarations
2855 *
2856 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2857 * "'" ([^%&'] | PEReference | Reference)* "'"
2858 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002859 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002860 */
2861
2862xmlChar *
2863xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2864 xmlChar *buf = NULL;
2865 int len = 0;
2866 int size = XML_PARSER_BUFFER_SIZE;
2867 int c, l;
2868 xmlChar stop;
2869 xmlChar *ret = NULL;
2870 const xmlChar *cur = NULL;
2871 xmlParserInputPtr input;
2872
2873 if (RAW == '"') stop = '"';
2874 else if (RAW == '\'') stop = '\'';
2875 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002880 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002881 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002882 return(NULL);
2883 }
2884
2885 /*
2886 * The content of the entity definition is copied in a buffer.
2887 */
2888
2889 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2890 input = ctxt->input;
2891 GROW;
2892 NEXT;
2893 c = CUR_CHAR(l);
2894 /*
2895 * NOTE: 4.4.5 Included in Literal
2896 * When a parameter entity reference appears in a literal entity
2897 * value, ... a single or double quote character in the replacement
2898 * text is always treated as a normal data character and will not
2899 * terminate the literal.
2900 * In practice it means we stop the loop only when back at parsing
2901 * the initial entity and the quote is found
2902 */
William M. Brack871611b2003-10-18 04:53:14 +00002903 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002904 (ctxt->input != input))) {
2905 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002906 xmlChar *tmp;
2907
Owen Taylor3473f882001-02-23 17:55:21 +00002908 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002909 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2910 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002911 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 return(NULL);
2914 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002916 }
2917 COPY_BUF(l,buf,len,c);
2918 NEXTL(l);
2919 /*
2920 * Pop-up of finished entities.
2921 */
2922 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2923 xmlPopInput(ctxt);
2924
2925 GROW;
2926 c = CUR_CHAR(l);
2927 if (c == 0) {
2928 GROW;
2929 c = CUR_CHAR(l);
2930 }
2931 }
2932 buf[len] = 0;
2933
2934 /*
2935 * Raise problem w.r.t. '&' and '%' being used in non-entities
2936 * reference constructs. Note Charref will be handled in
2937 * xmlStringDecodeEntities()
2938 */
2939 cur = buf;
2940 while (*cur != 0) { /* non input consuming */
2941 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2942 xmlChar *name;
2943 xmlChar tmp = *cur;
2944
2945 cur++;
2946 name = xmlParseStringName(ctxt, &cur);
2947 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002948 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002949 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002950 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002951 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002952 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2953 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002954 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002955 }
2956 if (name != NULL)
2957 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002958 if (*cur == 0)
2959 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002960 }
2961 cur++;
2962 }
2963
2964 /*
2965 * Then PEReference entities are substituted.
2966 */
2967 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002968 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002969 xmlFree(buf);
2970 } else {
2971 NEXT;
2972 /*
2973 * NOTE: 4.4.7 Bypassed
2974 * When a general entity reference appears in the EntityValue in
2975 * an entity declaration, it is bypassed and left as is.
2976 * so XML_SUBSTITUTE_REF is not set here.
2977 */
2978 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2979 0, 0, 0);
2980 if (orig != NULL)
2981 *orig = buf;
2982 else
2983 xmlFree(buf);
2984 }
2985
2986 return(ret);
2987}
2988
2989/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002990 * xmlParseAttValueComplex:
2991 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002992 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002993 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002994 *
2995 * parse a value for an attribute, this is the fallback function
2996 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002997 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002998 *
2999 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3000 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003001static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003002xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003003 xmlChar limit = 0;
3004 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003005 int len = 0;
3006 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003007 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003008 xmlChar *current = NULL;
3009 xmlEntityPtr ent;
3010
Owen Taylor3473f882001-02-23 17:55:21 +00003011 if (NXT(0) == '"') {
3012 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3013 limit = '"';
3014 NEXT;
3015 } else if (NXT(0) == '\'') {
3016 limit = '\'';
3017 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3018 NEXT;
3019 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003020 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003021 return(NULL);
3022 }
3023
3024 /*
3025 * allocate a translation buffer.
3026 */
3027 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003028 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003029 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003030
3031 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003032 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003033 */
3034 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003035 while ((NXT(0) != limit) && /* checked */
3036 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003037 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003038 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003039 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003040 if (NXT(1) == '#') {
3041 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003042
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003044 if (ctxt->replaceEntities) {
3045 if (len > buf_size - 10) {
3046 growBuffer(buf);
3047 }
3048 buf[len++] = '&';
3049 } else {
3050 /*
3051 * The reparsing will be done in xmlStringGetNodeList()
3052 * called by the attribute() function in SAX.c
3053 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003054 if (len > buf_size - 10) {
3055 growBuffer(buf);
3056 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003057 buf[len++] = '&';
3058 buf[len++] = '#';
3059 buf[len++] = '3';
3060 buf[len++] = '8';
3061 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003062 }
3063 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003064 if (len > buf_size - 10) {
3065 growBuffer(buf);
3066 }
Owen Taylor3473f882001-02-23 17:55:21 +00003067 len += xmlCopyChar(0, &buf[len], val);
3068 }
3069 } else {
3070 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003071 if ((ent != NULL) &&
3072 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3073 if (len > buf_size - 10) {
3074 growBuffer(buf);
3075 }
3076 if ((ctxt->replaceEntities == 0) &&
3077 (ent->content[0] == '&')) {
3078 buf[len++] = '&';
3079 buf[len++] = '#';
3080 buf[len++] = '3';
3081 buf[len++] = '8';
3082 buf[len++] = ';';
3083 } else {
3084 buf[len++] = ent->content[0];
3085 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003086 } else if ((ent != NULL) &&
3087 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003088 xmlChar *rep;
3089
3090 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3091 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 XML_SUBSTITUTE_REF,
3093 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003094 if (rep != NULL) {
3095 current = rep;
3096 while (*current != 0) { /* non input consuming */
3097 buf[len++] = *current++;
3098 if (len > buf_size - 10) {
3099 growBuffer(buf);
3100 }
3101 }
3102 xmlFree(rep);
3103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 if (ent->content != NULL)
3109 buf[len++] = ent->content[0];
3110 }
3111 } else if (ent != NULL) {
3112 int i = xmlStrlen(ent->name);
3113 const xmlChar *cur = ent->name;
3114
3115 /*
3116 * This may look absurd but is needed to detect
3117 * entities problems
3118 */
3119 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3120 (ent->content != NULL)) {
3121 xmlChar *rep;
3122 rep = xmlStringDecodeEntities(ctxt, ent->content,
3123 XML_SUBSTITUTE_REF, 0, 0, 0);
3124 if (rep != NULL)
3125 xmlFree(rep);
3126 }
3127
3128 /*
3129 * Just output the reference
3130 */
3131 buf[len++] = '&';
3132 if (len > buf_size - i - 10) {
3133 growBuffer(buf);
3134 }
3135 for (;i > 0;i--)
3136 buf[len++] = *cur++;
3137 buf[len++] = ';';
3138 }
3139 }
3140 } else {
3141 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003142 if ((len != 0) || (!normalize)) {
3143 if ((!normalize) || (!in_space)) {
3144 COPY_BUF(l,buf,len,0x20);
3145 if (len > buf_size - 10) {
3146 growBuffer(buf);
3147 }
3148 }
3149 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003150 }
3151 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003152 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003153 COPY_BUF(l,buf,len,c);
3154 if (len > buf_size - 10) {
3155 growBuffer(buf);
3156 }
3157 }
3158 NEXTL(l);
3159 }
3160 GROW;
3161 c = CUR_CHAR(l);
3162 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003163 if ((in_space) && (normalize)) {
3164 while (buf[len - 1] == 0x20) len--;
3165 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003166 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003167 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003168 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003169 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003170 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3171 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003172 } else
3173 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003174 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003175 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003176
3177mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003178 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003179 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003180}
3181
3182/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003183 * xmlParseAttValue:
3184 * @ctxt: an XML parser context
3185 *
3186 * parse a value for an attribute
3187 * Note: the parser won't do substitution of entities here, this
3188 * will be handled later in xmlStringGetNodeList
3189 *
3190 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3191 * "'" ([^<&'] | Reference)* "'"
3192 *
3193 * 3.3.3 Attribute-Value Normalization:
3194 * Before the value of an attribute is passed to the application or
3195 * checked for validity, the XML processor must normalize it as follows:
3196 * - a character reference is processed by appending the referenced
3197 * character to the attribute value
3198 * - an entity reference is processed by recursively processing the
3199 * replacement text of the entity
3200 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3201 * appending #x20 to the normalized value, except that only a single
3202 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3203 * parsed entity or the literal entity value of an internal parsed entity
3204 * - other characters are processed by appending them to the normalized value
3205 * If the declared value is not CDATA, then the XML processor must further
3206 * process the normalized attribute value by discarding any leading and
3207 * trailing space (#x20) characters, and by replacing sequences of space
3208 * (#x20) characters by a single space (#x20) character.
3209 * All attributes for which no declaration has been read should be treated
3210 * by a non-validating parser as if declared CDATA.
3211 *
3212 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3213 */
3214
3215
3216xmlChar *
3217xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003218 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003219 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003220}
3221
3222/**
Owen Taylor3473f882001-02-23 17:55:21 +00003223 * xmlParseSystemLiteral:
3224 * @ctxt: an XML parser context
3225 *
3226 * parse an XML Literal
3227 *
3228 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3229 *
3230 * Returns the SystemLiteral parsed or NULL
3231 */
3232
3233xmlChar *
3234xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3235 xmlChar *buf = NULL;
3236 int len = 0;
3237 int size = XML_PARSER_BUFFER_SIZE;
3238 int cur, l;
3239 xmlChar stop;
3240 int state = ctxt->instate;
3241 int count = 0;
3242
3243 SHRINK;
3244 if (RAW == '"') {
3245 NEXT;
3246 stop = '"';
3247 } else if (RAW == '\'') {
3248 NEXT;
3249 stop = '\'';
3250 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003251 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003252 return(NULL);
3253 }
3254
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003255 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003256 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3261 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003262 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003263 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003264 xmlChar *tmp;
3265
Owen Taylor3473f882001-02-23 17:55:21 +00003266 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3268 if (tmp == NULL) {
3269 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003270 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003271 ctxt->instate = (xmlParserInputState) state;
3272 return(NULL);
3273 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003274 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003275 }
3276 count++;
3277 if (count > 50) {
3278 GROW;
3279 count = 0;
3280 }
3281 COPY_BUF(l,buf,len,cur);
3282 NEXTL(l);
3283 cur = CUR_CHAR(l);
3284 if (cur == 0) {
3285 GROW;
3286 SHRINK;
3287 cur = CUR_CHAR(l);
3288 }
3289 }
3290 buf[len] = 0;
3291 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003292 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003293 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003294 } else {
3295 NEXT;
3296 }
3297 return(buf);
3298}
3299
3300/**
3301 * xmlParsePubidLiteral:
3302 * @ctxt: an XML parser context
3303 *
3304 * parse an XML public literal
3305 *
3306 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3307 *
3308 * Returns the PubidLiteral parsed or NULL.
3309 */
3310
3311xmlChar *
3312xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3313 xmlChar *buf = NULL;
3314 int len = 0;
3315 int size = XML_PARSER_BUFFER_SIZE;
3316 xmlChar cur;
3317 xmlChar stop;
3318 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003319 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003320
3321 SHRINK;
3322 if (RAW == '"') {
3323 NEXT;
3324 stop = '"';
3325 } else if (RAW == '\'') {
3326 NEXT;
3327 stop = '\'';
3328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003329 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003330 return(NULL);
3331 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003332 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003333 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 return(NULL);
3336 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003337 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003338 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003339 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003340 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003341 xmlChar *tmp;
3342
Owen Taylor3473f882001-02-23 17:55:21 +00003343 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003344 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3345 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003346 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003348 return(NULL);
3349 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003351 }
3352 buf[len++] = cur;
3353 count++;
3354 if (count > 50) {
3355 GROW;
3356 count = 0;
3357 }
3358 NEXT;
3359 cur = CUR;
3360 if (cur == 0) {
3361 GROW;
3362 SHRINK;
3363 cur = CUR;
3364 }
3365 }
3366 buf[len] = 0;
3367 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003368 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003369 } else {
3370 NEXT;
3371 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003372 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003373 return(buf);
3374}
3375
Daniel Veillard48b2f892001-02-25 16:11:03 +00003376void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003377
3378/*
3379 * used for the test in the inner loop of the char data testing
3380 */
3381static const unsigned char test_char_data[256] = {
3382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3383 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3386 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3387 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3388 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3389 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3390 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3391 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3392 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3393 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3394 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3395 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3396 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3397 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3414};
3415
Owen Taylor3473f882001-02-23 17:55:21 +00003416/**
3417 * xmlParseCharData:
3418 * @ctxt: an XML parser context
3419 * @cdata: int indicating whether we are within a CDATA section
3420 *
3421 * parse a CharData section.
3422 * if we are within a CDATA section ']]>' marks an end of section.
3423 *
3424 * The right angle bracket (>) may be represented using the string "&gt;",
3425 * and must, for compatibility, be escaped using "&gt;" or a character
3426 * reference when it appears in the string "]]>" in content, when that
3427 * string is not marking the end of a CDATA section.
3428 *
3429 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3430 */
3431
3432void
3433xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003434 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003435 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003436 int line = ctxt->input->line;
3437 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003438 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003439
3440 SHRINK;
3441 GROW;
3442 /*
3443 * Accelerated common case where input don't need to be
3444 * modified before passing it to the handler.
3445 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003446 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003447 in = ctxt->input->cur;
3448 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003449get_more_space:
3450 while (*in == 0x20) in++;
3451 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003452 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003453 in++;
3454 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003455 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003456 in++;
3457 }
3458 goto get_more_space;
3459 }
3460 if (*in == '<') {
3461 nbchar = in - ctxt->input->cur;
3462 if (nbchar > 0) {
3463 const xmlChar *tmp = ctxt->input->cur;
3464 ctxt->input->cur = in;
3465
Daniel Veillard34099b42004-11-04 17:34:35 +00003466 if ((ctxt->sax != NULL) &&
3467 (ctxt->sax->ignorableWhitespace !=
3468 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003469 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003470 if (ctxt->sax->ignorableWhitespace != NULL)
3471 ctxt->sax->ignorableWhitespace(ctxt->userData,
3472 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003473 } else if (ctxt->sax->characters != NULL)
3474 ctxt->sax->characters(ctxt->userData,
3475 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003476 } else if ((ctxt->sax != NULL) &&
3477 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003478 ctxt->sax->characters(ctxt->userData,
3479 tmp, nbchar);
3480 }
3481 }
3482 return;
3483 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003484
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003485get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003486 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003487 while (test_char_data[*in]) {
3488 in++;
3489 ccol++;
3490 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003491 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003492 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003493 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003494 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003495 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003496 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003497 in++;
3498 }
3499 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003500 }
3501 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003502 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003504 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003505 return;
3506 }
3507 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003508 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003509 goto get_more;
3510 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003511 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003512 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003513 if ((ctxt->sax != NULL) &&
3514 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003515 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003516 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003517 const xmlChar *tmp = ctxt->input->cur;
3518 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003519
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003520 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003521 if (ctxt->sax->ignorableWhitespace != NULL)
3522 ctxt->sax->ignorableWhitespace(ctxt->userData,
3523 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003524 } else if (ctxt->sax->characters != NULL)
3525 ctxt->sax->characters(ctxt->userData,
3526 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003527 line = ctxt->input->line;
3528 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003529 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003530 if (ctxt->sax->characters != NULL)
3531 ctxt->sax->characters(ctxt->userData,
3532 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003533 line = ctxt->input->line;
3534 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003535 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003536 }
3537 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003538 if (*in == 0xD) {
3539 in++;
3540 if (*in == 0xA) {
3541 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003543 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003544 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003545 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003546 in--;
3547 }
3548 if (*in == '<') {
3549 return;
3550 }
3551 if (*in == '&') {
3552 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003553 }
3554 SHRINK;
3555 GROW;
3556 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003557 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003558 nbchar = 0;
3559 }
Daniel Veillard50582112001-03-26 22:52:16 +00003560 ctxt->input->line = line;
3561 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003562 xmlParseCharDataComplex(ctxt, cdata);
3563}
3564
Daniel Veillard01c13b52002-12-10 15:19:08 +00003565/**
3566 * xmlParseCharDataComplex:
3567 * @ctxt: an XML parser context
3568 * @cdata: int indicating whether we are within a CDATA section
3569 *
3570 * parse a CharData section.this is the fallback function
3571 * of xmlParseCharData() when the parsing requires handling
3572 * of non-ASCII characters.
3573 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003574void
3575xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003576 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3577 int nbchar = 0;
3578 int cur, l;
3579 int count = 0;
3580
3581 SHRINK;
3582 GROW;
3583 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003584 while ((cur != '<') && /* checked */
3585 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003586 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003587 if ((cur == ']') && (NXT(1) == ']') &&
3588 (NXT(2) == '>')) {
3589 if (cdata) break;
3590 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003592 }
3593 }
3594 COPY_BUF(l,buf,nbchar,cur);
3595 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003596 buf[nbchar] = 0;
3597
Owen Taylor3473f882001-02-23 17:55:21 +00003598 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003599 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003600 */
3601 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003602 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003603 if (ctxt->sax->ignorableWhitespace != NULL)
3604 ctxt->sax->ignorableWhitespace(ctxt->userData,
3605 buf, nbchar);
3606 } else {
3607 if (ctxt->sax->characters != NULL)
3608 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3609 }
3610 }
3611 nbchar = 0;
3612 }
3613 count++;
3614 if (count > 50) {
3615 GROW;
3616 count = 0;
3617 }
3618 NEXTL(l);
3619 cur = CUR_CHAR(l);
3620 }
3621 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003622 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003623 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003624 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003625 */
3626 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003627 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003628 if (ctxt->sax->ignorableWhitespace != NULL)
3629 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3630 } else {
3631 if (ctxt->sax->characters != NULL)
3632 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3633 }
3634 }
3635 }
3636}
3637
3638/**
3639 * xmlParseExternalID:
3640 * @ctxt: an XML parser context
3641 * @publicID: a xmlChar** receiving PubidLiteral
3642 * @strict: indicate whether we should restrict parsing to only
3643 * production [75], see NOTE below
3644 *
3645 * Parse an External ID or a Public ID
3646 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003647 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003648 * 'PUBLIC' S PubidLiteral S SystemLiteral
3649 *
3650 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3651 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3652 *
3653 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3654 *
3655 * Returns the function returns SystemLiteral and in the second
3656 * case publicID receives PubidLiteral, is strict is off
3657 * it is possible to return NULL and have publicID set.
3658 */
3659
3660xmlChar *
3661xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3662 xmlChar *URI = NULL;
3663
3664 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003665
3666 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003667 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003668 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003669 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003670 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3671 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
3673 SKIP_BLANKS;
3674 URI = xmlParseSystemLiteral(ctxt);
3675 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003676 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003677 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003678 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003679 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003680 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003681 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003682 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
3684 SKIP_BLANKS;
3685 *publicID = xmlParsePubidLiteral(ctxt);
3686 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003687 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 }
3689 if (strict) {
3690 /*
3691 * We don't handle [83] so "S SystemLiteral" is required.
3692 */
William M. Brack76e95df2003-10-18 16:20:14 +00003693 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003695 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003696 }
3697 } else {
3698 /*
3699 * We handle [83] so we return immediately, if
3700 * "S SystemLiteral" is not detected. From a purely parsing
3701 * point of view that's a nice mess.
3702 */
3703 const xmlChar *ptr;
3704 GROW;
3705
3706 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003707 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003708
William M. Brack76e95df2003-10-18 16:20:14 +00003709 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003710 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3711 }
3712 SKIP_BLANKS;
3713 URI = xmlParseSystemLiteral(ctxt);
3714 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 }
3718 return(URI);
3719}
3720
3721/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003722 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003723 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003724 * @buf: the already parsed part of the buffer
3725 * @len: number of bytes filles in the buffer
3726 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003727 *
3728 * Skip an XML (SGML) comment <!-- .... -->
3729 * The spec says that "For compatibility, the string "--" (double-hyphen)
3730 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003731 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003732 *
3733 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3734 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003735static void
3736xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003737 int q, ql;
3738 int r, rl;
3739 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003740 xmlParserInputPtr input = ctxt->input;
3741 int count = 0;
3742
Owen Taylor3473f882001-02-23 17:55:21 +00003743 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003744 len = 0;
3745 size = XML_PARSER_BUFFER_SIZE;
3746 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3747 if (buf == NULL) {
3748 xmlErrMemory(ctxt, NULL);
3749 return;
3750 }
Owen Taylor3473f882001-02-23 17:55:21 +00003751 }
3752 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003753 if (q == 0)
3754 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003755 NEXTL(ql);
3756 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003757 if (r == 0)
3758 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003759 NEXTL(rl);
3760 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003761 if (cur == 0)
3762 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003763 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003764 ((cur != '>') ||
3765 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003766 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003767 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003768 }
3769 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003770 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003771 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003772 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3773 if (new_buf == NULL) {
3774 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003775 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return;
3777 }
William M. Bracka3215c72004-07-31 16:24:01 +00003778 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003779 }
3780 COPY_BUF(ql,buf,len,q);
3781 q = r;
3782 ql = rl;
3783 r = cur;
3784 rl = l;
3785
3786 count++;
3787 if (count > 50) {
3788 GROW;
3789 count = 0;
3790 }
3791 NEXTL(l);
3792 cur = CUR_CHAR(l);
3793 if (cur == 0) {
3794 SHRINK;
3795 GROW;
3796 cur = CUR_CHAR(l);
3797 }
3798 }
3799 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003800 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003801 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003802 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 xmlFree(buf);
3804 } else {
3805 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003806 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3807 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003808 }
3809 NEXT;
3810 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3811 (!ctxt->disableSAX))
3812 ctxt->sax->comment(ctxt->userData, buf);
3813 xmlFree(buf);
3814 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003815 return;
3816not_terminated:
3817 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3818 "Comment not terminated\n", NULL);
3819 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003820}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003821/**
3822 * xmlParseComment:
3823 * @ctxt: an XML parser context
3824 *
3825 * Skip an XML (SGML) comment <!-- .... -->
3826 * The spec says that "For compatibility, the string "--" (double-hyphen)
3827 * must not occur within comments. "
3828 *
3829 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3830 */
3831void
3832xmlParseComment(xmlParserCtxtPtr ctxt) {
3833 xmlChar *buf = NULL;
3834 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003835 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003836 xmlParserInputState state;
3837 const xmlChar *in;
3838 int nbchar = 0, ccol;
3839
3840 /*
3841 * Check that there is a comment right here.
3842 */
3843 if ((RAW != '<') || (NXT(1) != '!') ||
3844 (NXT(2) != '-') || (NXT(3) != '-')) return;
3845
3846 state = ctxt->instate;
3847 ctxt->instate = XML_PARSER_COMMENT;
3848 SKIP(4);
3849 SHRINK;
3850 GROW;
3851
3852 /*
3853 * Accelerated common case where input don't need to be
3854 * modified before passing it to the handler.
3855 */
3856 in = ctxt->input->cur;
3857 do {
3858 if (*in == 0xA) {
3859 ctxt->input->line++; ctxt->input->col = 1;
3860 in++;
3861 while (*in == 0xA) {
3862 ctxt->input->line++; ctxt->input->col = 1;
3863 in++;
3864 }
3865 }
3866get_more:
3867 ccol = ctxt->input->col;
3868 while (((*in > '-') && (*in <= 0x7F)) ||
3869 ((*in >= 0x20) && (*in < '-')) ||
3870 (*in == 0x09)) {
3871 in++;
3872 ccol++;
3873 }
3874 ctxt->input->col = ccol;
3875 if (*in == 0xA) {
3876 ctxt->input->line++; ctxt->input->col = 1;
3877 in++;
3878 while (*in == 0xA) {
3879 ctxt->input->line++; ctxt->input->col = 1;
3880 in++;
3881 }
3882 goto get_more;
3883 }
3884 nbchar = in - ctxt->input->cur;
3885 /*
3886 * save current set of data
3887 */
3888 if (nbchar > 0) {
3889 if ((ctxt->sax != NULL) &&
3890 (ctxt->sax->comment != NULL)) {
3891 if (buf == NULL) {
3892 if ((*in == '-') && (in[1] == '-'))
3893 size = nbchar + 1;
3894 else
3895 size = XML_PARSER_BUFFER_SIZE + nbchar;
3896 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3897 if (buf == NULL) {
3898 xmlErrMemory(ctxt, NULL);
3899 ctxt->instate = state;
3900 return;
3901 }
3902 len = 0;
3903 } else if (len + nbchar + 1 >= size) {
3904 xmlChar *new_buf;
3905 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3906 new_buf = (xmlChar *) xmlRealloc(buf,
3907 size * sizeof(xmlChar));
3908 if (new_buf == NULL) {
3909 xmlFree (buf);
3910 xmlErrMemory(ctxt, NULL);
3911 ctxt->instate = state;
3912 return;
3913 }
3914 buf = new_buf;
3915 }
3916 memcpy(&buf[len], ctxt->input->cur, nbchar);
3917 len += nbchar;
3918 buf[len] = 0;
3919 }
3920 }
3921 ctxt->input->cur = in;
3922 if (*in == 0xA)
3923
3924 if (*in == 0xD) {
3925 in++;
3926 if (*in == 0xA) {
3927 ctxt->input->cur = in;
3928 in++;
3929 ctxt->input->line++; ctxt->input->col = 1;
3930 continue; /* while */
3931 }
3932 in--;
3933 }
3934 SHRINK;
3935 GROW;
3936 in = ctxt->input->cur;
3937 if (*in == '-') {
3938 if (in[1] == '-') {
3939 if (in[2] == '>') {
3940 SKIP(3);
3941 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3942 (!ctxt->disableSAX)) {
3943 if (buf != NULL)
3944 ctxt->sax->comment(ctxt->userData, buf);
3945 else
3946 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3947 }
3948 if (buf != NULL)
3949 xmlFree(buf);
3950 ctxt->instate = state;
3951 return;
3952 }
3953 if (buf != NULL)
3954 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3955 "Comment not terminated \n<!--%.50s\n",
3956 buf);
3957 else
3958 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3959 "Comment not terminated \n", NULL);
3960 in++;
3961 ctxt->input->col++;
3962 }
3963 in++;
3964 ctxt->input->col++;
3965 goto get_more;
3966 }
3967 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3968 xmlParseCommentComplex(ctxt, buf, len, size);
3969 ctxt->instate = state;
3970 return;
3971}
3972
Owen Taylor3473f882001-02-23 17:55:21 +00003973
3974/**
3975 * xmlParsePITarget:
3976 * @ctxt: an XML parser context
3977 *
3978 * parse the name of a PI
3979 *
3980 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3981 *
3982 * Returns the PITarget name or NULL
3983 */
3984
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003985const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003986xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003987 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003988
3989 name = xmlParseName(ctxt);
3990 if ((name != NULL) &&
3991 ((name[0] == 'x') || (name[0] == 'X')) &&
3992 ((name[1] == 'm') || (name[1] == 'M')) &&
3993 ((name[2] == 'l') || (name[2] == 'L'))) {
3994 int i;
3995 if ((name[0] == 'x') && (name[1] == 'm') &&
3996 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003997 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003998 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003999 return(name);
4000 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004001 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 return(name);
4003 }
4004 for (i = 0;;i++) {
4005 if (xmlW3CPIs[i] == NULL) break;
4006 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4007 return(name);
4008 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004009 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4010 "xmlParsePITarget: invalid name prefix 'xml'\n",
4011 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 return(name);
4014}
4015
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004016#ifdef LIBXML_CATALOG_ENABLED
4017/**
4018 * xmlParseCatalogPI:
4019 * @ctxt: an XML parser context
4020 * @catalog: the PI value string
4021 *
4022 * parse an XML Catalog Processing Instruction.
4023 *
4024 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4025 *
4026 * Occurs only if allowed by the user and if happening in the Misc
4027 * part of the document before any doctype informations
4028 * This will add the given catalog to the parsing context in order
4029 * to be used if there is a resolution need further down in the document
4030 */
4031
4032static void
4033xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4034 xmlChar *URL = NULL;
4035 const xmlChar *tmp, *base;
4036 xmlChar marker;
4037
4038 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004039 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004040 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4041 goto error;
4042 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004043 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004044 if (*tmp != '=') {
4045 return;
4046 }
4047 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004048 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004049 marker = *tmp;
4050 if ((marker != '\'') && (marker != '"'))
4051 goto error;
4052 tmp++;
4053 base = tmp;
4054 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4055 if (*tmp == 0)
4056 goto error;
4057 URL = xmlStrndup(base, tmp - base);
4058 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004059 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004060 if (*tmp != 0)
4061 goto error;
4062
4063 if (URL != NULL) {
4064 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4065 xmlFree(URL);
4066 }
4067 return;
4068
4069error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004070 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4071 "Catalog PI syntax error: %s\n",
4072 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073 if (URL != NULL)
4074 xmlFree(URL);
4075}
4076#endif
4077
Owen Taylor3473f882001-02-23 17:55:21 +00004078/**
4079 * xmlParsePI:
4080 * @ctxt: an XML parser context
4081 *
4082 * parse an XML Processing Instruction.
4083 *
4084 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4085 *
4086 * The processing is transfered to SAX once parsed.
4087 */
4088
4089void
4090xmlParsePI(xmlParserCtxtPtr ctxt) {
4091 xmlChar *buf = NULL;
4092 int len = 0;
4093 int size = XML_PARSER_BUFFER_SIZE;
4094 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004095 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 xmlParserInputState state;
4097 int count = 0;
4098
4099 if ((RAW == '<') && (NXT(1) == '?')) {
4100 xmlParserInputPtr input = ctxt->input;
4101 state = ctxt->instate;
4102 ctxt->instate = XML_PARSER_PI;
4103 /*
4104 * this is a Processing Instruction.
4105 */
4106 SKIP(2);
4107 SHRINK;
4108
4109 /*
4110 * Parse the target name and check for special support like
4111 * namespace.
4112 */
4113 target = xmlParsePITarget(ctxt);
4114 if (target != NULL) {
4115 if ((RAW == '?') && (NXT(1) == '>')) {
4116 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004117 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4118 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004119 }
4120 SKIP(2);
4121
4122 /*
4123 * SAX: PI detected.
4124 */
4125 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4126 (ctxt->sax->processingInstruction != NULL))
4127 ctxt->sax->processingInstruction(ctxt->userData,
4128 target, NULL);
4129 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 return;
4131 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004132 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004133 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004134 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004135 ctxt->instate = state;
4136 return;
4137 }
4138 cur = CUR;
4139 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004140 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4141 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 }
4143 SKIP_BLANKS;
4144 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004145 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004146 ((cur != '?') || (NXT(1) != '>'))) {
4147 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004148 xmlChar *tmp;
4149
Owen Taylor3473f882001-02-23 17:55:21 +00004150 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004151 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4152 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004153 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004154 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004155 ctxt->instate = state;
4156 return;
4157 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004158 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004159 }
4160 count++;
4161 if (count > 50) {
4162 GROW;
4163 count = 0;
4164 }
4165 COPY_BUF(l,buf,len,cur);
4166 NEXTL(l);
4167 cur = CUR_CHAR(l);
4168 if (cur == 0) {
4169 SHRINK;
4170 GROW;
4171 cur = CUR_CHAR(l);
4172 }
4173 }
4174 buf[len] = 0;
4175 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004176 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4177 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004178 } else {
4179 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004180 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4181 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004182 }
4183 SKIP(2);
4184
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004185#ifdef LIBXML_CATALOG_ENABLED
4186 if (((state == XML_PARSER_MISC) ||
4187 (state == XML_PARSER_START)) &&
4188 (xmlStrEqual(target, XML_CATALOG_PI))) {
4189 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4190 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4191 (allow == XML_CATA_ALLOW_ALL))
4192 xmlParseCatalogPI(ctxt, buf);
4193 }
4194#endif
4195
4196
Owen Taylor3473f882001-02-23 17:55:21 +00004197 /*
4198 * SAX: PI detected.
4199 */
4200 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4201 (ctxt->sax->processingInstruction != NULL))
4202 ctxt->sax->processingInstruction(ctxt->userData,
4203 target, buf);
4204 }
4205 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004207 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004208 }
4209 ctxt->instate = state;
4210 }
4211}
4212
4213/**
4214 * xmlParseNotationDecl:
4215 * @ctxt: an XML parser context
4216 *
4217 * parse a notation declaration
4218 *
4219 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4220 *
4221 * Hence there is actually 3 choices:
4222 * 'PUBLIC' S PubidLiteral
4223 * 'PUBLIC' S PubidLiteral S SystemLiteral
4224 * and 'SYSTEM' S SystemLiteral
4225 *
4226 * See the NOTE on xmlParseExternalID().
4227 */
4228
4229void
4230xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004231 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004232 xmlChar *Pubid;
4233 xmlChar *Systemid;
4234
Daniel Veillarda07050d2003-10-19 14:46:32 +00004235 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004236 xmlParserInputPtr input = ctxt->input;
4237 SHRINK;
4238 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004239 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004240 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4241 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return;
4243 }
4244 SKIP_BLANKS;
4245
Daniel Veillard76d66f42001-05-16 21:05:17 +00004246 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004248 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004249 return;
4250 }
William M. Brack76e95df2003-10-18 16:20:14 +00004251 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004253 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return;
4255 }
4256 SKIP_BLANKS;
4257
4258 /*
4259 * Parse the IDs.
4260 */
4261 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4262 SKIP_BLANKS;
4263
4264 if (RAW == '>') {
4265 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4267 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 NEXT;
4270 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4271 (ctxt->sax->notationDecl != NULL))
4272 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4273 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004274 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004275 }
Owen Taylor3473f882001-02-23 17:55:21 +00004276 if (Systemid != NULL) xmlFree(Systemid);
4277 if (Pubid != NULL) xmlFree(Pubid);
4278 }
4279}
4280
4281/**
4282 * xmlParseEntityDecl:
4283 * @ctxt: an XML parser context
4284 *
4285 * parse <!ENTITY declarations
4286 *
4287 * [70] EntityDecl ::= GEDecl | PEDecl
4288 *
4289 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4290 *
4291 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4292 *
4293 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4294 *
4295 * [74] PEDef ::= EntityValue | ExternalID
4296 *
4297 * [76] NDataDecl ::= S 'NDATA' S Name
4298 *
4299 * [ VC: Notation Declared ]
4300 * The Name must match the declared name of a notation.
4301 */
4302
4303void
4304xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004305 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004306 xmlChar *value = NULL;
4307 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004308 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004309 int isParameter = 0;
4310 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004311 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004312
Daniel Veillard4c778d82005-01-23 17:37:44 +00004313 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004314 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004315 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 SHRINK;
4317 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004318 skipped = SKIP_BLANKS;
4319 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4321 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004322 }
Owen Taylor3473f882001-02-23 17:55:21 +00004323
4324 if (RAW == '%') {
4325 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004326 skipped = SKIP_BLANKS;
4327 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004328 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4329 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
Owen Taylor3473f882001-02-23 17:55:21 +00004331 isParameter = 1;
4332 }
4333
Daniel Veillard76d66f42001-05-16 21:05:17 +00004334 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004335 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004336 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4337 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004338 return;
4339 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004340 skipped = SKIP_BLANKS;
4341 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004342 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4343 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004344 }
Owen Taylor3473f882001-02-23 17:55:21 +00004345
Daniel Veillardf5582f12002-06-11 10:08:16 +00004346 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004347 /*
4348 * handle the various case of definitions...
4349 */
4350 if (isParameter) {
4351 if ((RAW == '"') || (RAW == '\'')) {
4352 value = xmlParseEntityValue(ctxt, &orig);
4353 if (value) {
4354 if ((ctxt->sax != NULL) &&
4355 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4356 ctxt->sax->entityDecl(ctxt->userData, name,
4357 XML_INTERNAL_PARAMETER_ENTITY,
4358 NULL, NULL, value);
4359 }
4360 } else {
4361 URI = xmlParseExternalID(ctxt, &literal, 1);
4362 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004363 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 if (URI) {
4366 xmlURIPtr uri;
4367
4368 uri = xmlParseURI((const char *) URI);
4369 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004370 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4371 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004372 /*
4373 * This really ought to be a well formedness error
4374 * but the XML Core WG decided otherwise c.f. issue
4375 * E26 of the XML erratas.
4376 */
Owen Taylor3473f882001-02-23 17:55:21 +00004377 } else {
4378 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004379 /*
4380 * Okay this is foolish to block those but not
4381 * invalid URIs.
4382 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004383 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004384 } else {
4385 if ((ctxt->sax != NULL) &&
4386 (!ctxt->disableSAX) &&
4387 (ctxt->sax->entityDecl != NULL))
4388 ctxt->sax->entityDecl(ctxt->userData, name,
4389 XML_EXTERNAL_PARAMETER_ENTITY,
4390 literal, URI, NULL);
4391 }
4392 xmlFreeURI(uri);
4393 }
4394 }
4395 }
4396 } else {
4397 if ((RAW == '"') || (RAW == '\'')) {
4398 value = xmlParseEntityValue(ctxt, &orig);
4399 if ((ctxt->sax != NULL) &&
4400 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4401 ctxt->sax->entityDecl(ctxt->userData, name,
4402 XML_INTERNAL_GENERAL_ENTITY,
4403 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004404 /*
4405 * For expat compatibility in SAX mode.
4406 */
4407 if ((ctxt->myDoc == NULL) ||
4408 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4409 if (ctxt->myDoc == NULL) {
4410 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4411 }
4412 if (ctxt->myDoc->intSubset == NULL)
4413 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4414 BAD_CAST "fake", NULL, NULL);
4415
Daniel Veillard1af9a412003-08-20 22:54:39 +00004416 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4417 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004418 }
Owen Taylor3473f882001-02-23 17:55:21 +00004419 } else {
4420 URI = xmlParseExternalID(ctxt, &literal, 1);
4421 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004422 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 if (URI) {
4425 xmlURIPtr uri;
4426
4427 uri = xmlParseURI((const char *)URI);
4428 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004429 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4430 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004431 /*
4432 * This really ought to be a well formedness error
4433 * but the XML Core WG decided otherwise c.f. issue
4434 * E26 of the XML erratas.
4435 */
Owen Taylor3473f882001-02-23 17:55:21 +00004436 } else {
4437 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004438 /*
4439 * Okay this is foolish to block those but not
4440 * invalid URIs.
4441 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004442 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 xmlFreeURI(uri);
4445 }
4446 }
William M. Brack76e95df2003-10-18 16:20:14 +00004447 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4449 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004450 }
4451 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004452 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004453 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4456 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
4458 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004459 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004460 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4461 (ctxt->sax->unparsedEntityDecl != NULL))
4462 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4463 literal, URI, ndata);
4464 } else {
4465 if ((ctxt->sax != NULL) &&
4466 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4467 ctxt->sax->entityDecl(ctxt->userData, name,
4468 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4469 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004470 /*
4471 * For expat compatibility in SAX mode.
4472 * assuming the entity repalcement was asked for
4473 */
4474 if ((ctxt->replaceEntities != 0) &&
4475 ((ctxt->myDoc == NULL) ||
4476 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4477 if (ctxt->myDoc == NULL) {
4478 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4479 }
4480
4481 if (ctxt->myDoc->intSubset == NULL)
4482 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4483 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004484 xmlSAX2EntityDecl(ctxt, name,
4485 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4486 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004487 }
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 }
4490 }
4491 SKIP_BLANKS;
4492 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004493 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004494 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004495 } else {
4496 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4498 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
4500 NEXT;
4501 }
4502 if (orig != NULL) {
4503 /*
4504 * Ugly mechanism to save the raw entity value.
4505 */
4506 xmlEntityPtr cur = NULL;
4507
4508 if (isParameter) {
4509 if ((ctxt->sax != NULL) &&
4510 (ctxt->sax->getParameterEntity != NULL))
4511 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4512 } else {
4513 if ((ctxt->sax != NULL) &&
4514 (ctxt->sax->getEntity != NULL))
4515 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004516 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004517 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004518 }
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 if (cur != NULL) {
4521 if (cur->orig != NULL)
4522 xmlFree(orig);
4523 else
4524 cur->orig = orig;
4525 } else
4526 xmlFree(orig);
4527 }
Owen Taylor3473f882001-02-23 17:55:21 +00004528 if (value != NULL) xmlFree(value);
4529 if (URI != NULL) xmlFree(URI);
4530 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532}
4533
4534/**
4535 * xmlParseDefaultDecl:
4536 * @ctxt: an XML parser context
4537 * @value: Receive a possible fixed default value for the attribute
4538 *
4539 * Parse an attribute default declaration
4540 *
4541 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4542 *
4543 * [ VC: Required Attribute ]
4544 * if the default declaration is the keyword #REQUIRED, then the
4545 * attribute must be specified for all elements of the type in the
4546 * attribute-list declaration.
4547 *
4548 * [ VC: Attribute Default Legal ]
4549 * The declared default value must meet the lexical constraints of
4550 * the declared attribute type c.f. xmlValidateAttributeDecl()
4551 *
4552 * [ VC: Fixed Attribute Default ]
4553 * if an attribute has a default value declared with the #FIXED
4554 * keyword, instances of that attribute must match the default value.
4555 *
4556 * [ WFC: No < in Attribute Values ]
4557 * handled in xmlParseAttValue()
4558 *
4559 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4560 * or XML_ATTRIBUTE_FIXED.
4561 */
4562
4563int
4564xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4565 int val;
4566 xmlChar *ret;
4567
4568 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004569 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP(9);
4571 return(XML_ATTRIBUTE_REQUIRED);
4572 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004573 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 SKIP(8);
4575 return(XML_ATTRIBUTE_IMPLIED);
4576 }
4577 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004578 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004579 SKIP(6);
4580 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004581 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004582 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4583 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004584 }
4585 SKIP_BLANKS;
4586 }
4587 ret = xmlParseAttValue(ctxt);
4588 ctxt->instate = XML_PARSER_DTD;
4589 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004590 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004591 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004592 } else
4593 *value = ret;
4594 return(val);
4595}
4596
4597/**
4598 * xmlParseNotationType:
4599 * @ctxt: an XML parser context
4600 *
4601 * parse an Notation attribute type.
4602 *
4603 * Note: the leading 'NOTATION' S part has already being parsed...
4604 *
4605 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4606 *
4607 * [ VC: Notation Attributes ]
4608 * Values of this type must match one of the notation names included
4609 * in the declaration; all notation names in the declaration must be declared.
4610 *
4611 * Returns: the notation attribute tree built while parsing
4612 */
4613
4614xmlEnumerationPtr
4615xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004616 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004617 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4618
4619 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 return(NULL);
4622 }
4623 SHRINK;
4624 do {
4625 NEXT;
4626 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004627 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004628 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4630 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 return(ret);
4632 }
4633 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (cur == NULL) return(ret);
4635 if (last == NULL) ret = last = cur;
4636 else {
4637 last->next = cur;
4638 last = cur;
4639 }
4640 SKIP_BLANKS;
4641 } while (RAW == '|');
4642 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004643 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004644 if ((last != NULL) && (last != ret))
4645 xmlFreeEnumeration(last);
4646 return(ret);
4647 }
4648 NEXT;
4649 return(ret);
4650}
4651
4652/**
4653 * xmlParseEnumerationType:
4654 * @ctxt: an XML parser context
4655 *
4656 * parse an Enumeration attribute type.
4657 *
4658 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4659 *
4660 * [ VC: Enumeration ]
4661 * Values of this type must match one of the Nmtoken tokens in
4662 * the declaration
4663 *
4664 * Returns: the enumeration attribute tree built while parsing
4665 */
4666
4667xmlEnumerationPtr
4668xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4669 xmlChar *name;
4670 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4671
4672 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004673 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004674 return(NULL);
4675 }
4676 SHRINK;
4677 do {
4678 NEXT;
4679 SKIP_BLANKS;
4680 name = xmlParseNmtoken(ctxt);
4681 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004682 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 return(ret);
4684 }
4685 cur = xmlCreateEnumeration(name);
4686 xmlFree(name);
4687 if (cur == NULL) return(ret);
4688 if (last == NULL) ret = last = cur;
4689 else {
4690 last->next = cur;
4691 last = cur;
4692 }
4693 SKIP_BLANKS;
4694 } while (RAW == '|');
4695 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 return(ret);
4698 }
4699 NEXT;
4700 return(ret);
4701}
4702
4703/**
4704 * xmlParseEnumeratedType:
4705 * @ctxt: an XML parser context
4706 * @tree: the enumeration tree built while parsing
4707 *
4708 * parse an Enumerated attribute type.
4709 *
4710 * [57] EnumeratedType ::= NotationType | Enumeration
4711 *
4712 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4713 *
4714 *
4715 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4716 */
4717
4718int
4719xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004720 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004721 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004722 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4724 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004725 return(0);
4726 }
4727 SKIP_BLANKS;
4728 *tree = xmlParseNotationType(ctxt);
4729 if (*tree == NULL) return(0);
4730 return(XML_ATTRIBUTE_NOTATION);
4731 }
4732 *tree = xmlParseEnumerationType(ctxt);
4733 if (*tree == NULL) return(0);
4734 return(XML_ATTRIBUTE_ENUMERATION);
4735}
4736
4737/**
4738 * xmlParseAttributeType:
4739 * @ctxt: an XML parser context
4740 * @tree: the enumeration tree built while parsing
4741 *
4742 * parse the Attribute list def for an element
4743 *
4744 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4745 *
4746 * [55] StringType ::= 'CDATA'
4747 *
4748 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4749 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4750 *
4751 * Validity constraints for attribute values syntax are checked in
4752 * xmlValidateAttributeValue()
4753 *
4754 * [ VC: ID ]
4755 * Values of type ID must match the Name production. A name must not
4756 * appear more than once in an XML document as a value of this type;
4757 * i.e., ID values must uniquely identify the elements which bear them.
4758 *
4759 * [ VC: One ID per Element Type ]
4760 * No element type may have more than one ID attribute specified.
4761 *
4762 * [ VC: ID Attribute Default ]
4763 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4764 *
4765 * [ VC: IDREF ]
4766 * Values of type IDREF must match the Name production, and values
4767 * of type IDREFS must match Names; each IDREF Name must match the value
4768 * of an ID attribute on some element in the XML document; i.e. IDREF
4769 * values must match the value of some ID attribute.
4770 *
4771 * [ VC: Entity Name ]
4772 * Values of type ENTITY must match the Name production, values
4773 * of type ENTITIES must match Names; each Entity Name must match the
4774 * name of an unparsed entity declared in the DTD.
4775 *
4776 * [ VC: Name Token ]
4777 * Values of type NMTOKEN must match the Nmtoken production; values
4778 * of type NMTOKENS must match Nmtokens.
4779 *
4780 * Returns the attribute type
4781 */
4782int
4783xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4784 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004785 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004786 SKIP(5);
4787 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004788 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004789 SKIP(6);
4790 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004791 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004792 SKIP(5);
4793 return(XML_ATTRIBUTE_IDREF);
4794 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4795 SKIP(2);
4796 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004797 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(6);
4799 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004800 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004801 SKIP(8);
4802 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004803 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004804 SKIP(8);
4805 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004806 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004807 SKIP(7);
4808 return(XML_ATTRIBUTE_NMTOKEN);
4809 }
4810 return(xmlParseEnumeratedType(ctxt, tree));
4811}
4812
4813/**
4814 * xmlParseAttributeListDecl:
4815 * @ctxt: an XML parser context
4816 *
4817 * : parse the Attribute list def for an element
4818 *
4819 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4820 *
4821 * [53] AttDef ::= S Name S AttType S DefaultDecl
4822 *
4823 */
4824void
4825xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004826 const xmlChar *elemName;
4827 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004828 xmlEnumerationPtr tree;
4829
Daniel Veillarda07050d2003-10-19 14:46:32 +00004830 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004831 xmlParserInputPtr input = ctxt->input;
4832
4833 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004834 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004835 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004836 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004837 }
4838 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004839 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4842 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return;
4844 }
4845 SKIP_BLANKS;
4846 GROW;
4847 while (RAW != '>') {
4848 const xmlChar *check = CUR_PTR;
4849 int type;
4850 int def;
4851 xmlChar *defaultValue = NULL;
4852
4853 GROW;
4854 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004855 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4858 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004859 break;
4860 }
4861 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004862 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004864 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004865 if (defaultValue != NULL)
4866 xmlFree(defaultValue);
4867 break;
4868 }
4869 SKIP_BLANKS;
4870
4871 type = xmlParseAttributeType(ctxt, &tree);
4872 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004873 if (defaultValue != NULL)
4874 xmlFree(defaultValue);
4875 break;
4876 }
4877
4878 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004879 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004880 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4881 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004882 if (defaultValue != NULL)
4883 xmlFree(defaultValue);
4884 if (tree != NULL)
4885 xmlFreeEnumeration(tree);
4886 break;
4887 }
4888 SKIP_BLANKS;
4889
4890 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4891 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004892 if (defaultValue != NULL)
4893 xmlFree(defaultValue);
4894 if (tree != NULL)
4895 xmlFreeEnumeration(tree);
4896 break;
4897 }
4898
4899 GROW;
4900 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004901 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004903 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004904 if (defaultValue != NULL)
4905 xmlFree(defaultValue);
4906 if (tree != NULL)
4907 xmlFreeEnumeration(tree);
4908 break;
4909 }
4910 SKIP_BLANKS;
4911 }
4912 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004913 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4914 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004915 if (defaultValue != NULL)
4916 xmlFree(defaultValue);
4917 if (tree != NULL)
4918 xmlFreeEnumeration(tree);
4919 break;
4920 }
4921 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4922 (ctxt->sax->attributeDecl != NULL))
4923 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4924 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004925 else if (tree != NULL)
4926 xmlFreeEnumeration(tree);
4927
4928 if ((ctxt->sax2) && (defaultValue != NULL) &&
4929 (def != XML_ATTRIBUTE_IMPLIED) &&
4930 (def != XML_ATTRIBUTE_REQUIRED)) {
4931 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4932 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004933 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4934 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 if (defaultValue != NULL)
4937 xmlFree(defaultValue);
4938 GROW;
4939 }
4940 if (RAW == '>') {
4941 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004942 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4943 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004944 }
4945 NEXT;
4946 }
Owen Taylor3473f882001-02-23 17:55:21 +00004947 }
4948}
4949
4950/**
4951 * xmlParseElementMixedContentDecl:
4952 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004953 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004954 *
4955 * parse the declaration for a Mixed Element content
4956 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4957 *
4958 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4959 * '(' S? '#PCDATA' S? ')'
4960 *
4961 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4962 *
4963 * [ VC: No Duplicate Types ]
4964 * The same name must not appear more than once in a single
4965 * mixed-content declaration.
4966 *
4967 * returns: the list of the xmlElementContentPtr describing the element choices
4968 */
4969xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004970xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004971 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004972 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004973
4974 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004975 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004976 SKIP(7);
4977 SKIP_BLANKS;
4978 SHRINK;
4979 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004981 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4982"Element content declaration doesn't start and stop in the same entity\n",
4983 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004984 }
Owen Taylor3473f882001-02-23 17:55:21 +00004985 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004986 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (RAW == '*') {
4988 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4989 NEXT;
4990 }
4991 return(ret);
4992 }
4993 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004994 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (ret == NULL) return(NULL);
4996 }
4997 while (RAW == '|') {
4998 NEXT;
4999 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005000 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 if (ret == NULL) return(NULL);
5002 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005003 if (cur != NULL)
5004 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005005 cur = ret;
5006 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005007 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005008 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005009 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005010 if (n->c1 != NULL)
5011 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005012 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005013 if (n != NULL)
5014 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 }
5017 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005018 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005019 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005021 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005022 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005023 return(NULL);
5024 }
5025 SKIP_BLANKS;
5026 GROW;
5027 }
5028 if ((RAW == ')') && (NXT(1) == '*')) {
5029 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005030 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005031 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005032 if (cur->c2 != NULL)
5033 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005034 }
5035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005037 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5038"Element content declaration doesn't start and stop in the same entity\n",
5039 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005040 }
Owen Taylor3473f882001-02-23 17:55:21 +00005041 SKIP(2);
5042 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005043 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005044 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005045 return(NULL);
5046 }
5047
5048 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005049 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 }
5051 return(ret);
5052}
5053
5054/**
5055 * xmlParseElementChildrenContentDecl:
5056 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005057 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005058 *
5059 * parse the declaration for a Mixed Element content
5060 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5061 *
5062 *
5063 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5064 *
5065 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5066 *
5067 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5068 *
5069 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5070 *
5071 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5072 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005073 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005074 * opening or closing parentheses in a choice, seq, or Mixed
5075 * construct is contained in the replacement text for a parameter
5076 * entity, both must be contained in the same replacement text. For
5077 * interoperability, if a parameter-entity reference appears in a
5078 * choice, seq, or Mixed construct, its replacement text should not
5079 * be empty, and neither the first nor last non-blank character of
5080 * the replacement text should be a connector (| or ,).
5081 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005082 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005083 * hierarchy.
5084 */
5085xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005087 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005088 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005089 xmlChar type = 0;
5090
5091 SKIP_BLANKS;
5092 GROW;
5093 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005094 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005095
Owen Taylor3473f882001-02-23 17:55:21 +00005096 /* Recurse on first child */
5097 NEXT;
5098 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005100 SKIP_BLANKS;
5101 GROW;
5102 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005103 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005105 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005106 return(NULL);
5107 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005108 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005109 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005110 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005111 return(NULL);
5112 }
Owen Taylor3473f882001-02-23 17:55:21 +00005113 GROW;
5114 if (RAW == '?') {
5115 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5116 NEXT;
5117 } else if (RAW == '*') {
5118 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5119 NEXT;
5120 } else if (RAW == '+') {
5121 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5122 NEXT;
5123 } else {
5124 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5125 }
Owen Taylor3473f882001-02-23 17:55:21 +00005126 GROW;
5127 }
5128 SKIP_BLANKS;
5129 SHRINK;
5130 while (RAW != ')') {
5131 /*
5132 * Each loop we parse one separator and one element.
5133 */
5134 if (RAW == ',') {
5135 if (type == 0) type = CUR;
5136
5137 /*
5138 * Detect "Name | Name , Name" error
5139 */
5140 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005142 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005144 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005145 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005146 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005147 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005148 return(NULL);
5149 }
5150 NEXT;
5151
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005152 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005154 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005155 xmlFreeDocElementContent(ctxt->myDoc, last);
5156 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 return(NULL);
5158 }
5159 if (last == NULL) {
5160 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005161 if (ret != NULL)
5162 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005163 ret = cur = op;
5164 } else {
5165 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005166 if (op != NULL)
5167 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005169 if (last != NULL)
5170 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 cur =op;
5172 last = NULL;
5173 }
5174 } else if (RAW == '|') {
5175 if (type == 0) type = CUR;
5176
5177 /*
5178 * Detect "Name , Name | Name" error
5179 */
5180 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005181 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005182 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005183 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005184 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005185 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005187 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005188 return(NULL);
5189 }
5190 NEXT;
5191
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005192 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005193 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005194 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005195 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005197 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 return(NULL);
5199 }
5200 if (last == NULL) {
5201 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005202 if (ret != NULL)
5203 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 ret = cur = op;
5205 } else {
5206 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005207 if (op != NULL)
5208 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005209 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005210 if (last != NULL)
5211 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005212 cur =op;
5213 last = NULL;
5214 }
5215 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005216 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 return(NULL);
5220 }
5221 GROW;
5222 SKIP_BLANKS;
5223 GROW;
5224 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005225 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005226 /* Recurse on second child */
5227 NEXT;
5228 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005229 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005230 SKIP_BLANKS;
5231 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005232 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005234 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005235 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 return(NULL);
5238 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 if (RAW == '?') {
5241 last->ocur = XML_ELEMENT_CONTENT_OPT;
5242 NEXT;
5243 } else if (RAW == '*') {
5244 last->ocur = XML_ELEMENT_CONTENT_MULT;
5245 NEXT;
5246 } else if (RAW == '+') {
5247 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5248 NEXT;
5249 } else {
5250 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5251 }
5252 }
5253 SKIP_BLANKS;
5254 GROW;
5255 }
5256 if ((cur != NULL) && (last != NULL)) {
5257 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005258 if (last != NULL)
5259 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005261 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005262 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5263"Element content declaration doesn't start and stop in the same entity\n",
5264 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005265 }
Owen Taylor3473f882001-02-23 17:55:21 +00005266 NEXT;
5267 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005268 if (ret != NULL) {
5269 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5270 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5271 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5272 else
5273 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5274 }
Owen Taylor3473f882001-02-23 17:55:21 +00005275 NEXT;
5276 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005277 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005278 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005279 cur = ret;
5280 /*
5281 * Some normalization:
5282 * (a | b* | c?)* == (a | b | c)*
5283 */
5284 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5285 if ((cur->c1 != NULL) &&
5286 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5287 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5288 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5289 if ((cur->c2 != NULL) &&
5290 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5291 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5292 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5293 cur = cur->c2;
5294 }
5295 }
Owen Taylor3473f882001-02-23 17:55:21 +00005296 NEXT;
5297 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005298 if (ret != NULL) {
5299 int found = 0;
5300
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005301 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5302 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5303 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005304 else
5305 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005306 /*
5307 * Some normalization:
5308 * (a | b*)+ == (a | b)*
5309 * (a | b?)+ == (a | b)*
5310 */
5311 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5312 if ((cur->c1 != NULL) &&
5313 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5314 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5315 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5316 found = 1;
5317 }
5318 if ((cur->c2 != NULL) &&
5319 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5320 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5321 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5322 found = 1;
5323 }
5324 cur = cur->c2;
5325 }
5326 if (found)
5327 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5328 }
Owen Taylor3473f882001-02-23 17:55:21 +00005329 NEXT;
5330 }
5331 return(ret);
5332}
5333
5334/**
5335 * xmlParseElementContentDecl:
5336 * @ctxt: an XML parser context
5337 * @name: the name of the element being defined.
5338 * @result: the Element Content pointer will be stored here if any
5339 *
5340 * parse the declaration for an Element content either Mixed or Children,
5341 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5342 *
5343 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5344 *
5345 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5346 */
5347
5348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005349xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005350 xmlElementContentPtr *result) {
5351
5352 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005353 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 int res;
5355
5356 *result = NULL;
5357
5358 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005359 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005360 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005361 return(-1);
5362 }
5363 NEXT;
5364 GROW;
5365 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005366 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005368 res = XML_ELEMENT_TYPE_MIXED;
5369 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005370 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005371 res = XML_ELEMENT_TYPE_ELEMENT;
5372 }
Owen Taylor3473f882001-02-23 17:55:21 +00005373 SKIP_BLANKS;
5374 *result = tree;
5375 return(res);
5376}
5377
5378/**
5379 * xmlParseElementDecl:
5380 * @ctxt: an XML parser context
5381 *
5382 * parse an Element declaration.
5383 *
5384 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5385 *
5386 * [ VC: Unique Element Type Declaration ]
5387 * No element type may be declared more than once
5388 *
5389 * Returns the type of the element, or -1 in case of error
5390 */
5391int
5392xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005393 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005394 int ret = -1;
5395 xmlElementContentPtr content = NULL;
5396
Daniel Veillard4c778d82005-01-23 17:37:44 +00005397 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005398 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005399 xmlParserInputPtr input = ctxt->input;
5400
5401 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005402 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5404 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005405 }
5406 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005407 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005408 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005409 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5410 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005411 return(-1);
5412 }
5413 while ((RAW == 0) && (ctxt->inputNr > 1))
5414 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005415 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5417 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005418 }
5419 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005420 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005421 SKIP(5);
5422 /*
5423 * Element must always be empty.
5424 */
5425 ret = XML_ELEMENT_TYPE_EMPTY;
5426 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5427 (NXT(2) == 'Y')) {
5428 SKIP(3);
5429 /*
5430 * Element is a generic container.
5431 */
5432 ret = XML_ELEMENT_TYPE_ANY;
5433 } else if (RAW == '(') {
5434 ret = xmlParseElementContentDecl(ctxt, name, &content);
5435 } else {
5436 /*
5437 * [ WFC: PEs in Internal Subset ] error handling.
5438 */
5439 if ((RAW == '%') && (ctxt->external == 0) &&
5440 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005441 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005443 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005444 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005445 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5446 }
Owen Taylor3473f882001-02-23 17:55:21 +00005447 return(-1);
5448 }
5449
5450 SKIP_BLANKS;
5451 /*
5452 * Pop-up of finished entities.
5453 */
5454 while ((RAW == 0) && (ctxt->inputNr > 1))
5455 xmlPopInput(ctxt);
5456 SKIP_BLANKS;
5457
5458 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005459 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005460 if (content != NULL) {
5461 xmlFreeDocElementContent(ctxt->myDoc, content);
5462 }
Owen Taylor3473f882001-02-23 17:55:21 +00005463 } else {
5464 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5466 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468
5469 NEXT;
5470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005471 (ctxt->sax->elementDecl != NULL)) {
5472 if (content != NULL)
5473 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005474 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5475 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005476 if ((content != NULL) && (content->parent == NULL)) {
5477 /*
5478 * this is a trick: if xmlAddElementDecl is called,
5479 * instead of copying the full tree it is plugged directly
5480 * if called from the parser. Avoid duplicating the
5481 * interfaces or change the API/ABI
5482 */
5483 xmlFreeDocElementContent(ctxt->myDoc, content);
5484 }
5485 } else if (content != NULL) {
5486 xmlFreeDocElementContent(ctxt->myDoc, content);
5487 }
Owen Taylor3473f882001-02-23 17:55:21 +00005488 }
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 return(ret);
5491}
5492
5493/**
Owen Taylor3473f882001-02-23 17:55:21 +00005494 * xmlParseConditionalSections
5495 * @ctxt: an XML parser context
5496 *
5497 * [61] conditionalSect ::= includeSect | ignoreSect
5498 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5499 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5500 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5501 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5502 */
5503
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005504static void
Owen Taylor3473f882001-02-23 17:55:21 +00005505xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5506 SKIP(3);
5507 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005508 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005509 SKIP(7);
5510 SKIP_BLANKS;
5511 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005512 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 } else {
5514 NEXT;
5515 }
5516 if (xmlParserDebugEntities) {
5517 if ((ctxt->input != NULL) && (ctxt->input->filename))
5518 xmlGenericError(xmlGenericErrorContext,
5519 "%s(%d): ", ctxt->input->filename,
5520 ctxt->input->line);
5521 xmlGenericError(xmlGenericErrorContext,
5522 "Entering INCLUDE Conditional Section\n");
5523 }
5524
5525 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5526 (NXT(2) != '>'))) {
5527 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005528 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005529
5530 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5531 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005532 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 NEXT;
5534 } else if (RAW == '%') {
5535 xmlParsePEReference(ctxt);
5536 } else
5537 xmlParseMarkupDecl(ctxt);
5538
5539 /*
5540 * Pop-up of finished entities.
5541 */
5542 while ((RAW == 0) && (ctxt->inputNr > 1))
5543 xmlPopInput(ctxt);
5544
Daniel Veillardfdc91562002-07-01 21:52:03 +00005545 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005546 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 break;
5548 }
5549 }
5550 if (xmlParserDebugEntities) {
5551 if ((ctxt->input != NULL) && (ctxt->input->filename))
5552 xmlGenericError(xmlGenericErrorContext,
5553 "%s(%d): ", ctxt->input->filename,
5554 ctxt->input->line);
5555 xmlGenericError(xmlGenericErrorContext,
5556 "Leaving INCLUDE Conditional Section\n");
5557 }
5558
Daniel Veillarda07050d2003-10-19 14:46:32 +00005559 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005561 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005562 int depth = 0;
5563
5564 SKIP(6);
5565 SKIP_BLANKS;
5566 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005567 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005568 } else {
5569 NEXT;
5570 }
5571 if (xmlParserDebugEntities) {
5572 if ((ctxt->input != NULL) && (ctxt->input->filename))
5573 xmlGenericError(xmlGenericErrorContext,
5574 "%s(%d): ", ctxt->input->filename,
5575 ctxt->input->line);
5576 xmlGenericError(xmlGenericErrorContext,
5577 "Entering IGNORE Conditional Section\n");
5578 }
5579
5580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005581 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005582 * But disable SAX event generating DTD building in the meantime
5583 */
5584 state = ctxt->disableSAX;
5585 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005587 ctxt->instate = XML_PARSER_IGNORE;
5588
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005589 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5591 depth++;
5592 SKIP(3);
5593 continue;
5594 }
5595 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5596 if (--depth >= 0) SKIP(3);
5597 continue;
5598 }
5599 NEXT;
5600 continue;
5601 }
5602
5603 ctxt->disableSAX = state;
5604 ctxt->instate = instate;
5605
5606 if (xmlParserDebugEntities) {
5607 if ((ctxt->input != NULL) && (ctxt->input->filename))
5608 xmlGenericError(xmlGenericErrorContext,
5609 "%s(%d): ", ctxt->input->filename,
5610 ctxt->input->line);
5611 xmlGenericError(xmlGenericErrorContext,
5612 "Leaving IGNORE Conditional Section\n");
5613 }
5614
5615 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
5618
5619 if (RAW == 0)
5620 SHRINK;
5621
5622 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005623 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005624 } else {
5625 SKIP(3);
5626 }
5627}
5628
5629/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005630 * xmlParseMarkupDecl:
5631 * @ctxt: an XML parser context
5632 *
5633 * parse Markup declarations
5634 *
5635 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5636 * NotationDecl | PI | Comment
5637 *
5638 * [ VC: Proper Declaration/PE Nesting ]
5639 * Parameter-entity replacement text must be properly nested with
5640 * markup declarations. That is to say, if either the first character
5641 * or the last character of a markup declaration (markupdecl above) is
5642 * contained in the replacement text for a parameter-entity reference,
5643 * both must be contained in the same replacement text.
5644 *
5645 * [ WFC: PEs in Internal Subset ]
5646 * In the internal DTD subset, parameter-entity references can occur
5647 * only where markup declarations can occur, not within markup declarations.
5648 * (This does not apply to references that occur in external parameter
5649 * entities or to the external subset.)
5650 */
5651void
5652xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5653 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005654 if (CUR == '<') {
5655 if (NXT(1) == '!') {
5656 switch (NXT(2)) {
5657 case 'E':
5658 if (NXT(3) == 'L')
5659 xmlParseElementDecl(ctxt);
5660 else if (NXT(3) == 'N')
5661 xmlParseEntityDecl(ctxt);
5662 break;
5663 case 'A':
5664 xmlParseAttributeListDecl(ctxt);
5665 break;
5666 case 'N':
5667 xmlParseNotationDecl(ctxt);
5668 break;
5669 case '-':
5670 xmlParseComment(ctxt);
5671 break;
5672 default:
5673 /* there is an error but it will be detected later */
5674 break;
5675 }
5676 } else if (NXT(1) == '?') {
5677 xmlParsePI(ctxt);
5678 }
5679 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005680 /*
5681 * This is only for internal subset. On external entities,
5682 * the replacement is done before parsing stage
5683 */
5684 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5685 xmlParsePEReference(ctxt);
5686
5687 /*
5688 * Conditional sections are allowed from entities included
5689 * by PE References in the internal subset.
5690 */
5691 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5692 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5693 xmlParseConditionalSections(ctxt);
5694 }
5695 }
5696
5697 ctxt->instate = XML_PARSER_DTD;
5698}
5699
5700/**
5701 * xmlParseTextDecl:
5702 * @ctxt: an XML parser context
5703 *
5704 * parse an XML declaration header for external entities
5705 *
5706 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5707 *
5708 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5709 */
5710
5711void
5712xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5713 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005714 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005715
5716 /*
5717 * We know that '<?xml' is here.
5718 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005719 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005720 SKIP(5);
5721 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005722 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005723 return;
5724 }
5725
William M. Brack76e95df2003-10-18 16:20:14 +00005726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5728 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005729 }
5730 SKIP_BLANKS;
5731
5732 /*
5733 * We may have the VersionInfo here.
5734 */
5735 version = xmlParseVersionInfo(ctxt);
5736 if (version == NULL)
5737 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005738 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5741 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005742 }
5743 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005744 ctxt->input->version = version;
5745
5746 /*
5747 * We must have the encoding declaration
5748 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005749 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5751 /*
5752 * The XML REC instructs us to stop parsing right here
5753 */
5754 return;
5755 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005756 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5757 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5758 "Missing encoding in text declaration\n");
5759 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005760
5761 SKIP_BLANKS;
5762 if ((RAW == '?') && (NXT(1) == '>')) {
5763 SKIP(2);
5764 } else if (RAW == '>') {
5765 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005766 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005767 NEXT;
5768 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005770 MOVETO_ENDTAG(CUR_PTR);
5771 NEXT;
5772 }
5773}
5774
5775/**
Owen Taylor3473f882001-02-23 17:55:21 +00005776 * xmlParseExternalSubset:
5777 * @ctxt: an XML parser context
5778 * @ExternalID: the external identifier
5779 * @SystemID: the system identifier (or URL)
5780 *
5781 * parse Markup declarations from an external subset
5782 *
5783 * [30] extSubset ::= textDecl? extSubsetDecl
5784 *
5785 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5786 */
5787void
5788xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5789 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005790 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005791 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005792 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005793 xmlParseTextDecl(ctxt);
5794 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5795 /*
5796 * The XML REC instructs us to stop parsing right here
5797 */
5798 ctxt->instate = XML_PARSER_EOF;
5799 return;
5800 }
5801 }
5802 if (ctxt->myDoc == NULL) {
5803 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5804 }
5805 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5806 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5807
5808 ctxt->instate = XML_PARSER_DTD;
5809 ctxt->external = 1;
5810 while (((RAW == '<') && (NXT(1) == '?')) ||
5811 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005812 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005813 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005814 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005815
5816 GROW;
5817 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5818 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005819 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005820 NEXT;
5821 } else if (RAW == '%') {
5822 xmlParsePEReference(ctxt);
5823 } else
5824 xmlParseMarkupDecl(ctxt);
5825
5826 /*
5827 * Pop-up of finished entities.
5828 */
5829 while ((RAW == 0) && (ctxt->inputNr > 1))
5830 xmlPopInput(ctxt);
5831
Daniel Veillardfdc91562002-07-01 21:52:03 +00005832 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005833 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005834 break;
5835 }
5836 }
5837
5838 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 }
5841
5842}
5843
5844/**
5845 * xmlParseReference:
5846 * @ctxt: an XML parser context
5847 *
5848 * parse and handle entity references in content, depending on the SAX
5849 * interface, this may end-up in a call to character() if this is a
5850 * CharRef, a predefined entity, if there is no reference() callback.
5851 * or if the parser was asked to switch to that mode.
5852 *
5853 * [67] Reference ::= EntityRef | CharRef
5854 */
5855void
5856xmlParseReference(xmlParserCtxtPtr ctxt) {
5857 xmlEntityPtr ent;
5858 xmlChar *val;
5859 if (RAW != '&') return;
5860
5861 if (NXT(1) == '#') {
5862 int i = 0;
5863 xmlChar out[10];
5864 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005865 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005866
5867 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5868 /*
5869 * So we are using non-UTF-8 buffers
5870 * Check that the char fit on 8bits, if not
5871 * generate a CharRef.
5872 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005873 if (value <= 0xFF) {
5874 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005875 out[1] = 0;
5876 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5877 (!ctxt->disableSAX))
5878 ctxt->sax->characters(ctxt->userData, out, 1);
5879 } else {
5880 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005881 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005883 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005884 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5885 (!ctxt->disableSAX))
5886 ctxt->sax->reference(ctxt->userData, out);
5887 }
5888 } else {
5889 /*
5890 * Just encode the value in UTF-8
5891 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005892 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005893 out[i] = 0;
5894 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5895 (!ctxt->disableSAX))
5896 ctxt->sax->characters(ctxt->userData, out, i);
5897 }
5898 } else {
5899 ent = xmlParseEntityRef(ctxt);
5900 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005901 if (!ctxt->wellFormed)
5902 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 if ((ent->name != NULL) &&
5904 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5905 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005906 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005907
5908
5909 /*
5910 * The first reference to the entity trigger a parsing phase
5911 * where the ent->children is filled with the result from
5912 * the parsing.
5913 */
5914 if (ent->children == NULL) {
5915 xmlChar *value;
5916 value = ent->content;
5917
5918 /*
5919 * Check that this entity is well formed
5920 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005921 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005922 (value[1] == 0) && (value[0] == '<') &&
5923 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5924 /*
5925 * DONE: get definite answer on this !!!
5926 * Lots of entity decls are used to declare a single
5927 * char
5928 * <!ENTITY lt "<">
5929 * Which seems to be valid since
5930 * 2.4: The ampersand character (&) and the left angle
5931 * bracket (<) may appear in their literal form only
5932 * when used ... They are also legal within the literal
5933 * entity value of an internal entity declaration;i
5934 * see "4.3.2 Well-Formed Parsed Entities".
5935 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5936 * Looking at the OASIS test suite and James Clark
5937 * tests, this is broken. However the XML REC uses
5938 * it. Is the XML REC not well-formed ????
5939 * This is a hack to avoid this problem
5940 *
5941 * ANSWER: since lt gt amp .. are already defined,
5942 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005943 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005944 * is lousy but acceptable.
5945 */
5946 list = xmlNewDocText(ctxt->myDoc, value);
5947 if (list != NULL) {
5948 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5949 (ent->children == NULL)) {
5950 ent->children = list;
5951 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005952 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005953 list->parent = (xmlNodePtr) ent;
5954 } else {
5955 xmlFreeNodeList(list);
5956 }
5957 } else if (list != NULL) {
5958 xmlFreeNodeList(list);
5959 }
5960 } else {
5961 /*
5962 * 4.3.2: An internal general parsed entity is well-formed
5963 * if its replacement text matches the production labeled
5964 * content.
5965 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005966
5967 void *user_data;
5968 /*
5969 * This is a bit hackish but this seems the best
5970 * way to make sure both SAX and DOM entity support
5971 * behaves okay.
5972 */
5973 if (ctxt->userData == ctxt)
5974 user_data = NULL;
5975 else
5976 user_data = ctxt->userData;
5977
Owen Taylor3473f882001-02-23 17:55:21 +00005978 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5979 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005980 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5981 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005982 ctxt->depth--;
5983 } else if (ent->etype ==
5984 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5985 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005986 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005987 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005988 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005989 ctxt->depth--;
5990 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005991 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005992 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5993 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005994 }
5995 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005996 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005997 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005998 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005999 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6000 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006001 (ent->children == NULL)) {
6002 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006003 if (ctxt->replaceEntities) {
6004 /*
6005 * Prune it directly in the generated document
6006 * except for single text nodes.
6007 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006008 if (((list->type == XML_TEXT_NODE) &&
6009 (list->next == NULL)) ||
6010 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006011 list->parent = (xmlNodePtr) ent;
6012 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006013 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006014 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006015 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006016 while (list != NULL) {
6017 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006018 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006019 if (list->next == NULL)
6020 ent->last = list;
6021 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006022 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006023 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006024#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006025 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6026 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006027#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006028 }
6029 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006030 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006031 while (list != NULL) {
6032 list->parent = (xmlNodePtr) ent;
6033 if (list->next == NULL)
6034 ent->last = list;
6035 list = list->next;
6036 }
Owen Taylor3473f882001-02-23 17:55:21 +00006037 }
6038 } else {
6039 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006040 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006041 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006042 } else if ((ret != XML_ERR_OK) &&
6043 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006044 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 } else if (list != NULL) {
6046 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006047 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006048 }
6049 }
6050 }
6051 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6052 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6053 /*
6054 * Create a node.
6055 */
6056 ctxt->sax->reference(ctxt->userData, ent->name);
6057 return;
6058 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006059 /*
6060 * There is a problem on the handling of _private for entities
6061 * (bug 155816): Should we copy the content of the field from
6062 * the entity (possibly overwriting some value set by the user
6063 * when a copy is created), should we leave it alone, or should
6064 * we try to take care of different situations? The problem
6065 * is exacerbated by the usage of this field by the xmlReader.
6066 * To fix this bug, we look at _private on the created node
6067 * and, if it's NULL, we copy in whatever was in the entity.
6068 * If it's not NULL we leave it alone. This is somewhat of a
6069 * hack - maybe we should have further tests to determine
6070 * what to do.
6071 */
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6073 /*
6074 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006075 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006076 * In the first occurrence list contains the replacement.
6077 * progressive == 2 means we are operating on the Reader
6078 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006079 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006080 if (((list == NULL) && (ent->owner == 0)) ||
6081 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006082 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006083
6084 /*
6085 * when operating on a reader, the entities definitions
6086 * are always owning the entities subtree.
6087 if (ctxt->parseMode == XML_PARSE_READER)
6088 ent->owner = 1;
6089 */
6090
Daniel Veillard62f313b2001-07-04 19:49:14 +00006091 cur = ent->children;
6092 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006093 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006094 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006095 if (nw->_private == NULL)
6096 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006097 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006098 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006099 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006100 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006101 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006102 if (cur == ent->last) {
6103 /*
6104 * needed to detect some strange empty
6105 * node cases in the reader tests
6106 */
6107 if ((ctxt->parseMode == XML_PARSE_READER) &&
6108 (nw->type == XML_ELEMENT_NODE) &&
6109 (nw->children == NULL))
6110 nw->extra = 1;
6111
Daniel Veillard62f313b2001-07-04 19:49:14 +00006112 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006113 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006114 cur = cur->next;
6115 }
Daniel Veillard81273902003-09-30 00:43:48 +00006116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006117 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006118 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006120 } else if (list == NULL) {
6121 xmlNodePtr nw = NULL, cur, next, last,
6122 firstChild = NULL;
6123 /*
6124 * Copy the entity child list and make it the new
6125 * entity child list. The goal is to make sure any
6126 * ID or REF referenced will be the one from the
6127 * document content and not the entity copy.
6128 */
6129 cur = ent->children;
6130 ent->children = NULL;
6131 last = ent->last;
6132 ent->last = NULL;
6133 while (cur != NULL) {
6134 next = cur->next;
6135 cur->next = NULL;
6136 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006137 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006138 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006139 if (nw->_private == NULL)
6140 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006141 if (firstChild == NULL){
6142 firstChild = cur;
6143 }
6144 xmlAddChild((xmlNodePtr) ent, nw);
6145 xmlAddChild(ctxt->node, cur);
6146 }
6147 if (cur == last)
6148 break;
6149 cur = next;
6150 }
6151 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006152#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006153 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6154 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006155#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006156 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006157 const xmlChar *nbktext;
6158
Daniel Veillard62f313b2001-07-04 19:49:14 +00006159 /*
6160 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006161 * node with a possible previous text one which
6162 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006163 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006164 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6165 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006166 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006167 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006168 if ((ent->last != ent->children) &&
6169 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006170 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006171 xmlAddChildList(ctxt->node, ent->children);
6172 }
6173
Owen Taylor3473f882001-02-23 17:55:21 +00006174 /*
6175 * This is to avoid a nasty side effect, see
6176 * characters() in SAX.c
6177 */
6178 ctxt->nodemem = 0;
6179 ctxt->nodelen = 0;
6180 return;
6181 } else {
6182 /*
6183 * Probably running in SAX mode
6184 */
6185 xmlParserInputPtr input;
6186
6187 input = xmlNewEntityInputStream(ctxt, ent);
6188 xmlPushInput(ctxt, input);
6189 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006190 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6191 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006192 xmlParseTextDecl(ctxt);
6193 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6194 /*
6195 * The XML REC instructs us to stop parsing right here
6196 */
6197 ctxt->instate = XML_PARSER_EOF;
6198 return;
6199 }
6200 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006201 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6202 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
6204 }
6205 return;
6206 }
6207 }
6208 } else {
6209 val = ent->content;
6210 if (val == NULL) return;
6211 /*
6212 * inline the entity.
6213 */
6214 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6215 (!ctxt->disableSAX))
6216 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6217 }
6218 }
6219}
6220
6221/**
6222 * xmlParseEntityRef:
6223 * @ctxt: an XML parser context
6224 *
6225 * parse ENTITY references declarations
6226 *
6227 * [68] EntityRef ::= '&' Name ';'
6228 *
6229 * [ WFC: Entity Declared ]
6230 * In a document without any DTD, a document with only an internal DTD
6231 * subset which contains no parameter entity references, or a document
6232 * with "standalone='yes'", the Name given in the entity reference
6233 * must match that in an entity declaration, except that well-formed
6234 * documents need not declare any of the following entities: amp, lt,
6235 * gt, apos, quot. The declaration of a parameter entity must precede
6236 * any reference to it. Similarly, the declaration of a general entity
6237 * must precede any reference to it which appears in a default value in an
6238 * attribute-list declaration. Note that if entities are declared in the
6239 * external subset or in external parameter entities, a non-validating
6240 * processor is not obligated to read and process their declarations;
6241 * for such documents, the rule that an entity must be declared is a
6242 * well-formedness constraint only if standalone='yes'.
6243 *
6244 * [ WFC: Parsed Entity ]
6245 * An entity reference must not contain the name of an unparsed entity
6246 *
6247 * Returns the xmlEntityPtr if found, or NULL otherwise.
6248 */
6249xmlEntityPtr
6250xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006251 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006252 xmlEntityPtr ent = NULL;
6253
6254 GROW;
6255
6256 if (RAW == '&') {
6257 NEXT;
6258 name = xmlParseName(ctxt);
6259 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006260 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6261 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006262 } else {
6263 if (RAW == ';') {
6264 NEXT;
6265 /*
6266 * Ask first SAX for entity resolution, otherwise try the
6267 * predefined set.
6268 */
6269 if (ctxt->sax != NULL) {
6270 if (ctxt->sax->getEntity != NULL)
6271 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006272 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006273 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006274 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6275 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006276 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006277 }
Owen Taylor3473f882001-02-23 17:55:21 +00006278 }
6279 /*
6280 * [ WFC: Entity Declared ]
6281 * In a document without any DTD, a document with only an
6282 * internal DTD subset which contains no parameter entity
6283 * references, or a document with "standalone='yes'", the
6284 * Name given in the entity reference must match that in an
6285 * entity declaration, except that well-formed documents
6286 * need not declare any of the following entities: amp, lt,
6287 * gt, apos, quot.
6288 * The declaration of a parameter entity must precede any
6289 * reference to it.
6290 * Similarly, the declaration of a general entity must
6291 * precede any reference to it which appears in a default
6292 * value in an attribute-list declaration. Note that if
6293 * entities are declared in the external subset or in
6294 * external parameter entities, a non-validating processor
6295 * is not obligated to read and process their declarations;
6296 * for such documents, the rule that an entity must be
6297 * declared is a well-formedness constraint only if
6298 * standalone='yes'.
6299 */
6300 if (ent == NULL) {
6301 if ((ctxt->standalone == 1) ||
6302 ((ctxt->hasExternalSubset == 0) &&
6303 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006304 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006305 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006307 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006308 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006309 if ((ctxt->inSubset == 0) &&
6310 (ctxt->sax != NULL) &&
6311 (ctxt->sax->reference != NULL)) {
6312 ctxt->sax->reference(ctxt, name);
6313 }
Owen Taylor3473f882001-02-23 17:55:21 +00006314 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006315 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006316 }
6317
6318 /*
6319 * [ WFC: Parsed Entity ]
6320 * An entity reference must not contain the name of an
6321 * unparsed entity
6322 */
6323 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006324 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006325 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006326 }
6327
6328 /*
6329 * [ WFC: No External Entity References ]
6330 * Attribute values cannot contain direct or indirect
6331 * entity references to external entities.
6332 */
6333 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006335 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6336 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 /*
6339 * [ WFC: No < in Attribute Values ]
6340 * The replacement text of any entity referred to directly or
6341 * indirectly in an attribute value (other than "&lt;") must
6342 * not contain a <.
6343 */
6344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6345 (ent != NULL) &&
6346 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6347 (ent->content != NULL) &&
6348 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006349 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006350 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 }
6352
6353 /*
6354 * Internal check, no parameter entities here ...
6355 */
6356 else {
6357 switch (ent->etype) {
6358 case XML_INTERNAL_PARAMETER_ENTITY:
6359 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006360 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6361 "Attempt to reference the parameter entity '%s'\n",
6362 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006363 break;
6364 default:
6365 break;
6366 }
6367 }
6368
6369 /*
6370 * [ WFC: No Recursion ]
6371 * A parsed entity must not contain a recursive reference
6372 * to itself, either directly or indirectly.
6373 * Done somewhere else
6374 */
6375
6376 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006377 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 }
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380 }
6381 return(ent);
6382}
6383
6384/**
6385 * xmlParseStringEntityRef:
6386 * @ctxt: an XML parser context
6387 * @str: a pointer to an index in the string
6388 *
6389 * parse ENTITY references declarations, but this version parses it from
6390 * a string value.
6391 *
6392 * [68] EntityRef ::= '&' Name ';'
6393 *
6394 * [ WFC: Entity Declared ]
6395 * In a document without any DTD, a document with only an internal DTD
6396 * subset which contains no parameter entity references, or a document
6397 * with "standalone='yes'", the Name given in the entity reference
6398 * must match that in an entity declaration, except that well-formed
6399 * documents need not declare any of the following entities: amp, lt,
6400 * gt, apos, quot. The declaration of a parameter entity must precede
6401 * any reference to it. Similarly, the declaration of a general entity
6402 * must precede any reference to it which appears in a default value in an
6403 * attribute-list declaration. Note that if entities are declared in the
6404 * external subset or in external parameter entities, a non-validating
6405 * processor is not obligated to read and process their declarations;
6406 * for such documents, the rule that an entity must be declared is a
6407 * well-formedness constraint only if standalone='yes'.
6408 *
6409 * [ WFC: Parsed Entity ]
6410 * An entity reference must not contain the name of an unparsed entity
6411 *
6412 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6413 * is updated to the current location in the string.
6414 */
6415xmlEntityPtr
6416xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6417 xmlChar *name;
6418 const xmlChar *ptr;
6419 xmlChar cur;
6420 xmlEntityPtr ent = NULL;
6421
6422 if ((str == NULL) || (*str == NULL))
6423 return(NULL);
6424 ptr = *str;
6425 cur = *ptr;
6426 if (cur == '&') {
6427 ptr++;
6428 cur = *ptr;
6429 name = xmlParseStringName(ctxt, &ptr);
6430 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006431 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6432 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006433 } else {
6434 if (*ptr == ';') {
6435 ptr++;
6436 /*
6437 * Ask first SAX for entity resolution, otherwise try the
6438 * predefined set.
6439 */
6440 if (ctxt->sax != NULL) {
6441 if (ctxt->sax->getEntity != NULL)
6442 ent = ctxt->sax->getEntity(ctxt->userData, name);
6443 if (ent == NULL)
6444 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006445 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006446 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006447 }
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
6449 /*
6450 * [ WFC: Entity Declared ]
6451 * In a document without any DTD, a document with only an
6452 * internal DTD subset which contains no parameter entity
6453 * references, or a document with "standalone='yes'", the
6454 * Name given in the entity reference must match that in an
6455 * entity declaration, except that well-formed documents
6456 * need not declare any of the following entities: amp, lt,
6457 * gt, apos, quot.
6458 * The declaration of a parameter entity must precede any
6459 * reference to it.
6460 * Similarly, the declaration of a general entity must
6461 * precede any reference to it which appears in a default
6462 * value in an attribute-list declaration. Note that if
6463 * entities are declared in the external subset or in
6464 * external parameter entities, a non-validating processor
6465 * is not obligated to read and process their declarations;
6466 * for such documents, the rule that an entity must be
6467 * declared is a well-formedness constraint only if
6468 * standalone='yes'.
6469 */
6470 if (ent == NULL) {
6471 if ((ctxt->standalone == 1) ||
6472 ((ctxt->hasExternalSubset == 0) &&
6473 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006474 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006475 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006477 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006478 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006479 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006481 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006482 }
6483
6484 /*
6485 * [ WFC: Parsed Entity ]
6486 * An entity reference must not contain the name of an
6487 * unparsed entity
6488 */
6489 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006490 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006491 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 }
6493
6494 /*
6495 * [ WFC: No External Entity References ]
6496 * Attribute values cannot contain direct or indirect
6497 * entity references to external entities.
6498 */
6499 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6500 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006501 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006502 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006503 }
6504 /*
6505 * [ WFC: No < in Attribute Values ]
6506 * The replacement text of any entity referred to directly or
6507 * indirectly in an attribute value (other than "&lt;") must
6508 * not contain a <.
6509 */
6510 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6511 (ent != NULL) &&
6512 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6513 (ent->content != NULL) &&
6514 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006515 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6516 "'<' in entity '%s' is not allowed in attributes values\n",
6517 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 }
6519
6520 /*
6521 * Internal check, no parameter entities here ...
6522 */
6523 else {
6524 switch (ent->etype) {
6525 case XML_INTERNAL_PARAMETER_ENTITY:
6526 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006527 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6528 "Attempt to reference the parameter entity '%s'\n",
6529 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006530 break;
6531 default:
6532 break;
6533 }
6534 }
6535
6536 /*
6537 * [ WFC: No Recursion ]
6538 * A parsed entity must not contain a recursive reference
6539 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006540 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006541 */
6542
6543 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006544 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006545 }
6546 xmlFree(name);
6547 }
6548 }
6549 *str = ptr;
6550 return(ent);
6551}
6552
6553/**
6554 * xmlParsePEReference:
6555 * @ctxt: an XML parser context
6556 *
6557 * parse PEReference declarations
6558 * The entity content is handled directly by pushing it's content as
6559 * a new input stream.
6560 *
6561 * [69] PEReference ::= '%' Name ';'
6562 *
6563 * [ WFC: No Recursion ]
6564 * A parsed entity must not contain a recursive
6565 * reference to itself, either directly or indirectly.
6566 *
6567 * [ WFC: Entity Declared ]
6568 * In a document without any DTD, a document with only an internal DTD
6569 * subset which contains no parameter entity references, or a document
6570 * with "standalone='yes'", ... ... The declaration of a parameter
6571 * entity must precede any reference to it...
6572 *
6573 * [ VC: Entity Declared ]
6574 * In a document with an external subset or external parameter entities
6575 * with "standalone='no'", ... ... The declaration of a parameter entity
6576 * must precede any reference to it...
6577 *
6578 * [ WFC: In DTD ]
6579 * Parameter-entity references may only appear in the DTD.
6580 * NOTE: misleading but this is handled.
6581 */
6582void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006583xmlParsePEReference(xmlParserCtxtPtr ctxt)
6584{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006585 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006586 xmlEntityPtr entity = NULL;
6587 xmlParserInputPtr input;
6588
6589 if (RAW == '%') {
6590 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006591 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006592 if (name == NULL) {
6593 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6594 "xmlParsePEReference: no name\n");
6595 } else {
6596 if (RAW == ';') {
6597 NEXT;
6598 if ((ctxt->sax != NULL) &&
6599 (ctxt->sax->getParameterEntity != NULL))
6600 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6601 name);
6602 if (entity == NULL) {
6603 /*
6604 * [ WFC: Entity Declared ]
6605 * In a document without any DTD, a document with only an
6606 * internal DTD subset which contains no parameter entity
6607 * references, or a document with "standalone='yes'", ...
6608 * ... The declaration of a parameter entity must precede
6609 * any reference to it...
6610 */
6611 if ((ctxt->standalone == 1) ||
6612 ((ctxt->hasExternalSubset == 0) &&
6613 (ctxt->hasPErefs == 0))) {
6614 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6615 "PEReference: %%%s; not found\n",
6616 name);
6617 } else {
6618 /*
6619 * [ VC: Entity Declared ]
6620 * In a document with an external subset or external
6621 * parameter entities with "standalone='no'", ...
6622 * ... The declaration of a parameter entity must
6623 * precede any reference to it...
6624 */
6625 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6626 "PEReference: %%%s; not found\n",
6627 name, NULL);
6628 ctxt->valid = 0;
6629 }
6630 } else {
6631 /*
6632 * Internal checking in case the entity quest barfed
6633 */
6634 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6635 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6636 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6637 "Internal: %%%s; is not a parameter entity\n",
6638 name, NULL);
6639 } else if (ctxt->input->free != deallocblankswrapper) {
6640 input =
6641 xmlNewBlanksWrapperInputStream(ctxt, entity);
6642 xmlPushInput(ctxt, input);
6643 } else {
6644 /*
6645 * TODO !!!
6646 * handle the extra spaces added before and after
6647 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6648 */
6649 input = xmlNewEntityInputStream(ctxt, entity);
6650 xmlPushInput(ctxt, input);
6651 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006652 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006653 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006654 xmlParseTextDecl(ctxt);
6655 if (ctxt->errNo ==
6656 XML_ERR_UNSUPPORTED_ENCODING) {
6657 /*
6658 * The XML REC instructs us to stop parsing
6659 * right here
6660 */
6661 ctxt->instate = XML_PARSER_EOF;
6662 return;
6663 }
6664 }
6665 }
6666 }
6667 ctxt->hasPErefs = 1;
6668 } else {
6669 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6670 }
6671 }
Owen Taylor3473f882001-02-23 17:55:21 +00006672 }
6673}
6674
6675/**
6676 * xmlParseStringPEReference:
6677 * @ctxt: an XML parser context
6678 * @str: a pointer to an index in the string
6679 *
6680 * parse PEReference declarations
6681 *
6682 * [69] PEReference ::= '%' Name ';'
6683 *
6684 * [ WFC: No Recursion ]
6685 * A parsed entity must not contain a recursive
6686 * reference to itself, either directly or indirectly.
6687 *
6688 * [ WFC: Entity Declared ]
6689 * In a document without any DTD, a document with only an internal DTD
6690 * subset which contains no parameter entity references, or a document
6691 * with "standalone='yes'", ... ... The declaration of a parameter
6692 * entity must precede any reference to it...
6693 *
6694 * [ VC: Entity Declared ]
6695 * In a document with an external subset or external parameter entities
6696 * with "standalone='no'", ... ... The declaration of a parameter entity
6697 * must precede any reference to it...
6698 *
6699 * [ WFC: In DTD ]
6700 * Parameter-entity references may only appear in the DTD.
6701 * NOTE: misleading but this is handled.
6702 *
6703 * Returns the string of the entity content.
6704 * str is updated to the current value of the index
6705 */
6706xmlEntityPtr
6707xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6708 const xmlChar *ptr;
6709 xmlChar cur;
6710 xmlChar *name;
6711 xmlEntityPtr entity = NULL;
6712
6713 if ((str == NULL) || (*str == NULL)) return(NULL);
6714 ptr = *str;
6715 cur = *ptr;
6716 if (cur == '%') {
6717 ptr++;
6718 cur = *ptr;
6719 name = xmlParseStringName(ctxt, &ptr);
6720 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006721 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6722 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006723 } else {
6724 cur = *ptr;
6725 if (cur == ';') {
6726 ptr++;
6727 cur = *ptr;
6728 if ((ctxt->sax != NULL) &&
6729 (ctxt->sax->getParameterEntity != NULL))
6730 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6731 name);
6732 if (entity == NULL) {
6733 /*
6734 * [ WFC: Entity Declared ]
6735 * In a document without any DTD, a document with only an
6736 * internal DTD subset which contains no parameter entity
6737 * references, or a document with "standalone='yes'", ...
6738 * ... The declaration of a parameter entity must precede
6739 * any reference to it...
6740 */
6741 if ((ctxt->standalone == 1) ||
6742 ((ctxt->hasExternalSubset == 0) &&
6743 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006744 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006745 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006746 } else {
6747 /*
6748 * [ VC: Entity Declared ]
6749 * In a document with an external subset or external
6750 * parameter entities with "standalone='no'", ...
6751 * ... The declaration of a parameter entity must
6752 * precede any reference to it...
6753 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006754 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6755 "PEReference: %%%s; not found\n",
6756 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006757 ctxt->valid = 0;
6758 }
6759 } else {
6760 /*
6761 * Internal checking in case the entity quest barfed
6762 */
6763 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6764 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006765 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6766 "%%%s; is not a parameter entity\n",
6767 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 }
6770 ctxt->hasPErefs = 1;
6771 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006772 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 xmlFree(name);
6775 }
6776 }
6777 *str = ptr;
6778 return(entity);
6779}
6780
6781/**
6782 * xmlParseDocTypeDecl:
6783 * @ctxt: an XML parser context
6784 *
6785 * parse a DOCTYPE declaration
6786 *
6787 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6788 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6789 *
6790 * [ VC: Root Element Type ]
6791 * The Name in the document type declaration must match the element
6792 * type of the root element.
6793 */
6794
6795void
6796xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006797 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 xmlChar *ExternalID = NULL;
6799 xmlChar *URI = NULL;
6800
6801 /*
6802 * We know that '<!DOCTYPE' has been detected.
6803 */
6804 SKIP(9);
6805
6806 SKIP_BLANKS;
6807
6808 /*
6809 * Parse the DOCTYPE name.
6810 */
6811 name = xmlParseName(ctxt);
6812 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006813 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6814 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006815 }
6816 ctxt->intSubName = name;
6817
6818 SKIP_BLANKS;
6819
6820 /*
6821 * Check for SystemID and ExternalID
6822 */
6823 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6824
6825 if ((URI != NULL) || (ExternalID != NULL)) {
6826 ctxt->hasExternalSubset = 1;
6827 }
6828 ctxt->extSubURI = URI;
6829 ctxt->extSubSystem = ExternalID;
6830
6831 SKIP_BLANKS;
6832
6833 /*
6834 * Create and update the internal subset.
6835 */
6836 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6837 (!ctxt->disableSAX))
6838 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6839
6840 /*
6841 * Is there any internal subset declarations ?
6842 * they are handled separately in xmlParseInternalSubset()
6843 */
6844 if (RAW == '[')
6845 return;
6846
6847 /*
6848 * We should be at the end of the DOCTYPE declaration.
6849 */
6850 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006851 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006852 }
6853 NEXT;
6854}
6855
6856/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006857 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006858 * @ctxt: an XML parser context
6859 *
6860 * parse the internal subset declaration
6861 *
6862 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6863 */
6864
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006865static void
Owen Taylor3473f882001-02-23 17:55:21 +00006866xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6867 /*
6868 * Is there any DTD definition ?
6869 */
6870 if (RAW == '[') {
6871 ctxt->instate = XML_PARSER_DTD;
6872 NEXT;
6873 /*
6874 * Parse the succession of Markup declarations and
6875 * PEReferences.
6876 * Subsequence (markupdecl | PEReference | S)*
6877 */
6878 while (RAW != ']') {
6879 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006880 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006881
6882 SKIP_BLANKS;
6883 xmlParseMarkupDecl(ctxt);
6884 xmlParsePEReference(ctxt);
6885
6886 /*
6887 * Pop-up of finished entities.
6888 */
6889 while ((RAW == 0) && (ctxt->inputNr > 1))
6890 xmlPopInput(ctxt);
6891
6892 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006893 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006894 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006895 break;
6896 }
6897 }
6898 if (RAW == ']') {
6899 NEXT;
6900 SKIP_BLANKS;
6901 }
6902 }
6903
6904 /*
6905 * We should be at the end of the DOCTYPE declaration.
6906 */
6907 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006908 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006909 }
6910 NEXT;
6911}
6912
Daniel Veillard81273902003-09-30 00:43:48 +00006913#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006914/**
6915 * xmlParseAttribute:
6916 * @ctxt: an XML parser context
6917 * @value: a xmlChar ** used to store the value of the attribute
6918 *
6919 * parse an attribute
6920 *
6921 * [41] Attribute ::= Name Eq AttValue
6922 *
6923 * [ WFC: No External Entity References ]
6924 * Attribute values cannot contain direct or indirect entity references
6925 * to external entities.
6926 *
6927 * [ WFC: No < in Attribute Values ]
6928 * The replacement text of any entity referred to directly or indirectly in
6929 * an attribute value (other than "&lt;") must not contain a <.
6930 *
6931 * [ VC: Attribute Value Type ]
6932 * The attribute must have been declared; the value must be of the type
6933 * declared for it.
6934 *
6935 * [25] Eq ::= S? '=' S?
6936 *
6937 * With namespace:
6938 *
6939 * [NS 11] Attribute ::= QName Eq AttValue
6940 *
6941 * Also the case QName == xmlns:??? is handled independently as a namespace
6942 * definition.
6943 *
6944 * Returns the attribute name, and the value in *value.
6945 */
6946
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006947const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006948xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006949 const xmlChar *name;
6950 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006951
6952 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006953 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006954 name = xmlParseName(ctxt);
6955 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006956 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006957 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006958 return(NULL);
6959 }
6960
6961 /*
6962 * read the value
6963 */
6964 SKIP_BLANKS;
6965 if (RAW == '=') {
6966 NEXT;
6967 SKIP_BLANKS;
6968 val = xmlParseAttValue(ctxt);
6969 ctxt->instate = XML_PARSER_CONTENT;
6970 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006971 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006972 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 return(NULL);
6974 }
6975
6976 /*
6977 * Check that xml:lang conforms to the specification
6978 * No more registered as an error, just generate a warning now
6979 * since this was deprecated in XML second edition
6980 */
6981 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6982 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006983 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6984 "Malformed value for xml:lang : %s\n",
6985 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006986 }
6987 }
6988
6989 /*
6990 * Check that xml:space conforms to the specification
6991 */
6992 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6993 if (xmlStrEqual(val, BAD_CAST "default"))
6994 *(ctxt->space) = 0;
6995 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6996 *(ctxt->space) = 1;
6997 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006998 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006999"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007000 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002 }
7003
7004 *value = val;
7005 return(name);
7006}
7007
7008/**
7009 * xmlParseStartTag:
7010 * @ctxt: an XML parser context
7011 *
7012 * parse a start of tag either for rule element or
7013 * EmptyElement. In both case we don't parse the tag closing chars.
7014 *
7015 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7016 *
7017 * [ WFC: Unique Att Spec ]
7018 * No attribute name may appear more than once in the same start-tag or
7019 * empty-element tag.
7020 *
7021 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7022 *
7023 * [ WFC: Unique Att Spec ]
7024 * No attribute name may appear more than once in the same start-tag or
7025 * empty-element tag.
7026 *
7027 * With namespace:
7028 *
7029 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7030 *
7031 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7032 *
7033 * Returns the element name parsed
7034 */
7035
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007036const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007037xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007038 const xmlChar *name;
7039 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007040 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007041 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007042 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007043 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 int i;
7045
7046 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007047 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007048
7049 name = xmlParseName(ctxt);
7050 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007051 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007052 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007053 return(NULL);
7054 }
7055
7056 /*
7057 * Now parse the attributes, it ends up with the ending
7058 *
7059 * (S Attribute)* S?
7060 */
7061 SKIP_BLANKS;
7062 GROW;
7063
Daniel Veillard21a0f912001-02-25 19:54:14 +00007064 while ((RAW != '>') &&
7065 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007066 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007067 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007068 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007069
7070 attname = xmlParseAttribute(ctxt, &attvalue);
7071 if ((attname != NULL) && (attvalue != NULL)) {
7072 /*
7073 * [ WFC: Unique Att Spec ]
7074 * No attribute name may appear more than once in the same
7075 * start-tag or empty-element tag.
7076 */
7077 for (i = 0; i < nbatts;i += 2) {
7078 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007079 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007080 xmlFree(attvalue);
7081 goto failed;
7082 }
7083 }
Owen Taylor3473f882001-02-23 17:55:21 +00007084 /*
7085 * Add the pair to atts
7086 */
7087 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007088 maxatts = 22; /* allow for 10 attrs by default */
7089 atts = (const xmlChar **)
7090 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007091 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007092 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007093 if (attvalue != NULL)
7094 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007095 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007096 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007097 ctxt->atts = atts;
7098 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007099 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007100 const xmlChar **n;
7101
Owen Taylor3473f882001-02-23 17:55:21 +00007102 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007103 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007104 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007105 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007106 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007107 if (attvalue != NULL)
7108 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007109 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007110 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007111 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007112 ctxt->atts = atts;
7113 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007114 }
7115 atts[nbatts++] = attname;
7116 atts[nbatts++] = attvalue;
7117 atts[nbatts] = NULL;
7118 atts[nbatts + 1] = NULL;
7119 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007120 if (attvalue != NULL)
7121 xmlFree(attvalue);
7122 }
7123
7124failed:
7125
Daniel Veillard3772de32002-12-17 10:31:45 +00007126 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007127 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7128 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007129 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7131 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007132 }
7133 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007134 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7135 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7137 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 break;
7139 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007140 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007141 GROW;
7142 }
7143
7144 /*
7145 * SAX: Start of Element !
7146 */
7147 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007148 (!ctxt->disableSAX)) {
7149 if (nbatts > 0)
7150 ctxt->sax->startElement(ctxt->userData, name, atts);
7151 else
7152 ctxt->sax->startElement(ctxt->userData, name, NULL);
7153 }
Owen Taylor3473f882001-02-23 17:55:21 +00007154
7155 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007156 /* Free only the content strings */
7157 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007158 if (atts[i] != NULL)
7159 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007160 }
7161 return(name);
7162}
7163
7164/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007165 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007166 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007167 * @line: line of the start tag
7168 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007169 *
7170 * parse an end of tag
7171 *
7172 * [42] ETag ::= '</' Name S? '>'
7173 *
7174 * With namespace
7175 *
7176 * [NS 9] ETag ::= '</' QName S? '>'
7177 */
7178
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007179static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007180xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007181 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007182
7183 GROW;
7184 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007185 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007186 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007187 return;
7188 }
7189 SKIP(2);
7190
Daniel Veillard46de64e2002-05-29 08:21:33 +00007191 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007192
7193 /*
7194 * We should definitely be at the ending "S? '>'" part
7195 */
7196 GROW;
7197 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007198 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007199 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007200 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007201 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007202
7203 /*
7204 * [ WFC: Element Type Match ]
7205 * The Name in an element's end-tag must match the element type in the
7206 * start-tag.
7207 *
7208 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007209 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007210 if (name == NULL) name = BAD_CAST "unparseable";
7211 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007212 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007213 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007214 }
7215
7216 /*
7217 * SAX: End of Tag
7218 */
7219 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7220 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007221 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007222
Daniel Veillarde57ec792003-09-10 10:50:59 +00007223 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007224 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007225 return;
7226}
7227
7228/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007229 * xmlParseEndTag:
7230 * @ctxt: an XML parser context
7231 *
7232 * parse an end of tag
7233 *
7234 * [42] ETag ::= '</' Name S? '>'
7235 *
7236 * With namespace
7237 *
7238 * [NS 9] ETag ::= '</' QName S? '>'
7239 */
7240
7241void
7242xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007243 xmlParseEndTag1(ctxt, 0);
7244}
Daniel Veillard81273902003-09-30 00:43:48 +00007245#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007246
7247/************************************************************************
7248 * *
7249 * SAX 2 specific operations *
7250 * *
7251 ************************************************************************/
7252
7253static const xmlChar *
7254xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7255 int len = 0, l;
7256 int c;
7257 int count = 0;
7258
7259 /*
7260 * Handler for more complex cases
7261 */
7262 GROW;
7263 c = CUR_CHAR(l);
7264 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007265 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 return(NULL);
7267 }
7268
7269 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007270 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007271 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007272 (IS_COMBINING(c)) ||
7273 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274 if (count++ > 100) {
7275 count = 0;
7276 GROW;
7277 }
7278 len += l;
7279 NEXTL(l);
7280 c = CUR_CHAR(l);
7281 }
7282 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7283}
7284
7285/*
7286 * xmlGetNamespace:
7287 * @ctxt: an XML parser context
7288 * @prefix: the prefix to lookup
7289 *
7290 * Lookup the namespace name for the @prefix (which ca be NULL)
7291 * The prefix must come from the @ctxt->dict dictionnary
7292 *
7293 * Returns the namespace name or NULL if not bound
7294 */
7295static const xmlChar *
7296xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7297 int i;
7298
Daniel Veillarde57ec792003-09-10 10:50:59 +00007299 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007300 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007301 if (ctxt->nsTab[i] == prefix) {
7302 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7303 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007305 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007306 return(NULL);
7307}
7308
7309/**
7310 * xmlParseNCName:
7311 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007312 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 *
7314 * parse an XML name.
7315 *
7316 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7317 * CombiningChar | Extender
7318 *
7319 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7320 *
7321 * Returns the Name parsed or NULL
7322 */
7323
7324static const xmlChar *
7325xmlParseNCName(xmlParserCtxtPtr ctxt) {
7326 const xmlChar *in;
7327 const xmlChar *ret;
7328 int count = 0;
7329
7330 /*
7331 * Accelerator for simple ASCII names
7332 */
7333 in = ctxt->input->cur;
7334 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7335 ((*in >= 0x41) && (*in <= 0x5A)) ||
7336 (*in == '_')) {
7337 in++;
7338 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7339 ((*in >= 0x41) && (*in <= 0x5A)) ||
7340 ((*in >= 0x30) && (*in <= 0x39)) ||
7341 (*in == '_') || (*in == '-') ||
7342 (*in == '.'))
7343 in++;
7344 if ((*in > 0) && (*in < 0x80)) {
7345 count = in - ctxt->input->cur;
7346 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7347 ctxt->input->cur = in;
7348 ctxt->nbChars += count;
7349 ctxt->input->col += count;
7350 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007351 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007352 }
7353 return(ret);
7354 }
7355 }
7356 return(xmlParseNCNameComplex(ctxt));
7357}
7358
7359/**
7360 * xmlParseQName:
7361 * @ctxt: an XML parser context
7362 * @prefix: pointer to store the prefix part
7363 *
7364 * parse an XML Namespace QName
7365 *
7366 * [6] QName ::= (Prefix ':')? LocalPart
7367 * [7] Prefix ::= NCName
7368 * [8] LocalPart ::= NCName
7369 *
7370 * Returns the Name parsed or NULL
7371 */
7372
7373static const xmlChar *
7374xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7375 const xmlChar *l, *p;
7376
7377 GROW;
7378
7379 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007380 if (l == NULL) {
7381 if (CUR == ':') {
7382 l = xmlParseName(ctxt);
7383 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007384 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7385 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007386 *prefix = NULL;
7387 return(l);
7388 }
7389 }
7390 return(NULL);
7391 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007392 if (CUR == ':') {
7393 NEXT;
7394 p = l;
7395 l = xmlParseNCName(ctxt);
7396 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007397 xmlChar *tmp;
7398
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007399 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7400 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007401 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7402 p = xmlDictLookup(ctxt->dict, tmp, -1);
7403 if (tmp != NULL) xmlFree(tmp);
7404 *prefix = NULL;
7405 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007406 }
7407 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007408 xmlChar *tmp;
7409
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007410 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7411 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007412 NEXT;
7413 tmp = (xmlChar *) xmlParseName(ctxt);
7414 if (tmp != NULL) {
7415 tmp = xmlBuildQName(tmp, l, NULL, 0);
7416 l = xmlDictLookup(ctxt->dict, tmp, -1);
7417 if (tmp != NULL) xmlFree(tmp);
7418 *prefix = p;
7419 return(l);
7420 }
7421 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7422 l = xmlDictLookup(ctxt->dict, tmp, -1);
7423 if (tmp != NULL) xmlFree(tmp);
7424 *prefix = p;
7425 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 }
7427 *prefix = p;
7428 } else
7429 *prefix = NULL;
7430 return(l);
7431}
7432
7433/**
7434 * xmlParseQNameAndCompare:
7435 * @ctxt: an XML parser context
7436 * @name: the localname
7437 * @prefix: the prefix, if any.
7438 *
7439 * parse an XML name and compares for match
7440 * (specialized for endtag parsing)
7441 *
7442 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7443 * and the name for mismatch
7444 */
7445
7446static const xmlChar *
7447xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7448 xmlChar const *prefix) {
7449 const xmlChar *cmp = name;
7450 const xmlChar *in;
7451 const xmlChar *ret;
7452 const xmlChar *prefix2;
7453
7454 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7455
7456 GROW;
7457 in = ctxt->input->cur;
7458
7459 cmp = prefix;
7460 while (*in != 0 && *in == *cmp) {
7461 ++in;
7462 ++cmp;
7463 }
7464 if ((*cmp == 0) && (*in == ':')) {
7465 in++;
7466 cmp = name;
7467 while (*in != 0 && *in == *cmp) {
7468 ++in;
7469 ++cmp;
7470 }
William M. Brack76e95df2003-10-18 16:20:14 +00007471 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 /* success */
7473 ctxt->input->cur = in;
7474 return((const xmlChar*) 1);
7475 }
7476 }
7477 /*
7478 * all strings coms from the dictionary, equality can be done directly
7479 */
7480 ret = xmlParseQName (ctxt, &prefix2);
7481 if ((ret == name) && (prefix == prefix2))
7482 return((const xmlChar*) 1);
7483 return ret;
7484}
7485
7486/**
7487 * xmlParseAttValueInternal:
7488 * @ctxt: an XML parser context
7489 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007490 * @alloc: whether the attribute was reallocated as a new string
7491 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 *
7493 * parse a value for an attribute.
7494 * NOTE: if no normalization is needed, the routine will return pointers
7495 * directly from the data buffer.
7496 *
7497 * 3.3.3 Attribute-Value Normalization:
7498 * Before the value of an attribute is passed to the application or
7499 * checked for validity, the XML processor must normalize it as follows:
7500 * - a character reference is processed by appending the referenced
7501 * character to the attribute value
7502 * - an entity reference is processed by recursively processing the
7503 * replacement text of the entity
7504 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7505 * appending #x20 to the normalized value, except that only a single
7506 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7507 * parsed entity or the literal entity value of an internal parsed entity
7508 * - other characters are processed by appending them to the normalized value
7509 * If the declared value is not CDATA, then the XML processor must further
7510 * process the normalized attribute value by discarding any leading and
7511 * trailing space (#x20) characters, and by replacing sequences of space
7512 * (#x20) characters by a single space (#x20) character.
7513 * All attributes for which no declaration has been read should be treated
7514 * by a non-validating parser as if declared CDATA.
7515 *
7516 * Returns the AttValue parsed or NULL. The value has to be freed by the
7517 * caller if it was copied, this can be detected by val[*len] == 0.
7518 */
7519
7520static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007521xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7522 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007523{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007525 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007526 xmlChar *ret = NULL;
7527
7528 GROW;
7529 in = (xmlChar *) CUR_PTR;
7530 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007531 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007532 return (NULL);
7533 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007535
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007536 /*
7537 * try to handle in this routine the most common case where no
7538 * allocation of a new string is required and where content is
7539 * pure ASCII.
7540 */
7541 limit = *in++;
7542 end = ctxt->input->end;
7543 start = in;
7544 if (in >= end) {
7545 const xmlChar *oldbase = ctxt->input->base;
7546 GROW;
7547 if (oldbase != ctxt->input->base) {
7548 long delta = ctxt->input->base - oldbase;
7549 start = start + delta;
7550 in = in + delta;
7551 }
7552 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007553 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007554 if (normalize) {
7555 /*
7556 * Skip any leading spaces
7557 */
7558 while ((in < end) && (*in != limit) &&
7559 ((*in == 0x20) || (*in == 0x9) ||
7560 (*in == 0xA) || (*in == 0xD))) {
7561 in++;
7562 start = in;
7563 if (in >= end) {
7564 const xmlChar *oldbase = ctxt->input->base;
7565 GROW;
7566 if (oldbase != ctxt->input->base) {
7567 long delta = ctxt->input->base - oldbase;
7568 start = start + delta;
7569 in = in + delta;
7570 }
7571 end = ctxt->input->end;
7572 }
7573 }
7574 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7575 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7576 if ((*in++ == 0x20) && (*in == 0x20)) break;
7577 if (in >= end) {
7578 const xmlChar *oldbase = ctxt->input->base;
7579 GROW;
7580 if (oldbase != ctxt->input->base) {
7581 long delta = ctxt->input->base - oldbase;
7582 start = start + delta;
7583 in = in + delta;
7584 }
7585 end = ctxt->input->end;
7586 }
7587 }
7588 last = in;
7589 /*
7590 * skip the trailing blanks
7591 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007592 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007593 while ((in < end) && (*in != limit) &&
7594 ((*in == 0x20) || (*in == 0x9) ||
7595 (*in == 0xA) || (*in == 0xD))) {
7596 in++;
7597 if (in >= end) {
7598 const xmlChar *oldbase = ctxt->input->base;
7599 GROW;
7600 if (oldbase != ctxt->input->base) {
7601 long delta = ctxt->input->base - oldbase;
7602 start = start + delta;
7603 in = in + delta;
7604 last = last + delta;
7605 }
7606 end = ctxt->input->end;
7607 }
7608 }
7609 if (*in != limit) goto need_complex;
7610 } else {
7611 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7612 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7613 in++;
7614 if (in >= end) {
7615 const xmlChar *oldbase = ctxt->input->base;
7616 GROW;
7617 if (oldbase != ctxt->input->base) {
7618 long delta = ctxt->input->base - oldbase;
7619 start = start + delta;
7620 in = in + delta;
7621 }
7622 end = ctxt->input->end;
7623 }
7624 }
7625 last = in;
7626 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007628 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007629 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007630 *len = last - start;
7631 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007632 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007633 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007634 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007635 }
7636 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007637 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007639need_complex:
7640 if (alloc) *alloc = 1;
7641 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007642}
7643
7644/**
7645 * xmlParseAttribute2:
7646 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007647 * @pref: the element prefix
7648 * @elem: the element name
7649 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007651 * @len: an int * to save the length of the attribute
7652 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 *
7654 * parse an attribute in the new SAX2 framework.
7655 *
7656 * Returns the attribute name, and the value in *value, .
7657 */
7658
7659static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007660xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7661 const xmlChar *pref, const xmlChar *elem,
7662 const xmlChar **prefix, xmlChar **value,
7663 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007665 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007666 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007667
7668 *value = NULL;
7669 GROW;
7670 name = xmlParseQName(ctxt, prefix);
7671 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007672 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7673 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007674 return(NULL);
7675 }
7676
7677 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 * get the type if needed
7679 */
7680 if (ctxt->attsSpecial != NULL) {
7681 int type;
7682
7683 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7684 pref, elem, *prefix, name);
7685 if (type != 0) normalize = 1;
7686 }
7687
7688 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 * read the value
7690 */
7691 SKIP_BLANKS;
7692 if (RAW == '=') {
7693 NEXT;
7694 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007695 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007696 ctxt->instate = XML_PARSER_CONTENT;
7697 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007698 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007699 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007700 return(NULL);
7701 }
7702
Daniel Veillardd8925572005-06-08 22:34:55 +00007703 if (*prefix == ctxt->str_xml) {
7704 /*
7705 * Check that xml:lang conforms to the specification
7706 * No more registered as an error, just generate a warning now
7707 * since this was deprecated in XML second edition
7708 */
7709 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7710 internal_val = xmlStrndup(val, *len);
7711 if (!xmlCheckLanguageID(internal_val)) {
7712 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7713 "Malformed value for xml:lang : %s\n",
7714 internal_val, NULL);
7715 }
7716 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717
Daniel Veillardd8925572005-06-08 22:34:55 +00007718 /*
7719 * Check that xml:space conforms to the specification
7720 */
7721 if (xmlStrEqual(name, BAD_CAST "space")) {
7722 internal_val = xmlStrndup(val, *len);
7723 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7724 *(ctxt->space) = 0;
7725 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7726 *(ctxt->space) = 1;
7727 else {
7728 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007730 internal_val, NULL);
7731 }
7732 }
7733 if (internal_val) {
7734 xmlFree(internal_val);
7735 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737
7738 *value = val;
7739 return(name);
7740}
7741
7742/**
7743 * xmlParseStartTag2:
7744 * @ctxt: an XML parser context
7745 *
7746 * parse a start of tag either for rule element or
7747 * EmptyElement. In both case we don't parse the tag closing chars.
7748 * This routine is called when running SAX2 parsing
7749 *
7750 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7751 *
7752 * [ WFC: Unique Att Spec ]
7753 * No attribute name may appear more than once in the same start-tag or
7754 * empty-element tag.
7755 *
7756 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7757 *
7758 * [ WFC: Unique Att Spec ]
7759 * No attribute name may appear more than once in the same start-tag or
7760 * empty-element tag.
7761 *
7762 * With namespace:
7763 *
7764 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7765 *
7766 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7767 *
7768 * Returns the element name parsed
7769 */
7770
7771static const xmlChar *
7772xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007773 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 const xmlChar *localname;
7775 const xmlChar *prefix;
7776 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007777 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007778 const xmlChar *nsname;
7779 xmlChar *attvalue;
7780 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007782 int nratts, nbatts, nbdef;
7783 int i, j, nbNs, attval;
7784 const xmlChar *base;
7785 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007786 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787
7788 if (RAW != '<') return(NULL);
7789 NEXT1;
7790
7791 /*
7792 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7793 * point since the attribute values may be stored as pointers to
7794 * the buffer and calling SHRINK would destroy them !
7795 * The Shrinking is only possible once the full set of attribute
7796 * callbacks have been done.
7797 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007800 base = ctxt->input->base;
7801 cur = ctxt->input->cur - ctxt->input->base;
7802 nbatts = 0;
7803 nratts = 0;
7804 nbdef = 0;
7805 nbNs = 0;
7806 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007807 /* Forget any namespaces added during an earlier parse of this element. */
7808 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007809
7810 localname = xmlParseQName(ctxt, &prefix);
7811 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007812 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7813 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814 return(NULL);
7815 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007816 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817
7818 /*
7819 * Now parse the attributes, it ends up with the ending
7820 *
7821 * (S Attribute)* S?
7822 */
7823 SKIP_BLANKS;
7824 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007825 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007826
7827 while ((RAW != '>') &&
7828 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007829 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007830 const xmlChar *q = CUR_PTR;
7831 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007832 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007833
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007834 attname = xmlParseAttribute2(ctxt, prefix, localname,
7835 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007836 if ((attname != NULL) && (attvalue != NULL)) {
7837 if (len < 0) len = xmlStrlen(attvalue);
7838 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007839 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7840 xmlURIPtr uri;
7841
7842 if (*URL != 0) {
7843 uri = xmlParseURI((const char *) URL);
7844 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007845 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7846 "xmlns: %s not a valid URI\n",
7847 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007848 } else {
7849 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007850 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7851 "xmlns: URI %s is not absolute\n",
7852 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007853 }
7854 xmlFreeURI(uri);
7855 }
7856 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007857 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007858 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007860 for (j = 1;j <= nbNs;j++)
7861 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7862 break;
7863 if (j <= nbNs)
7864 xmlErrAttributeDup(ctxt, NULL, attname);
7865 else
7866 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 if (alloc != 0) xmlFree(attvalue);
7868 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007869 continue;
7870 }
7871 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007872 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7873 xmlURIPtr uri;
7874
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007875 if (attname == ctxt->str_xml) {
7876 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007877 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7878 "xml namespace prefix mapped to wrong URI\n",
7879 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007880 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007881 /*
7882 * Do not keep a namespace definition node
7883 */
7884 if (alloc != 0) xmlFree(attvalue);
7885 SKIP_BLANKS;
7886 continue;
7887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007888 uri = xmlParseURI((const char *) URL);
7889 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007890 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7891 "xmlns:%s: '%s' is not a valid URI\n",
7892 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007893 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007894 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007895 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7896 "xmlns:%s: URI %s is not absolute\n",
7897 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007898 }
7899 xmlFreeURI(uri);
7900 }
7901
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007903 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007905 for (j = 1;j <= nbNs;j++)
7906 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7907 break;
7908 if (j <= nbNs)
7909 xmlErrAttributeDup(ctxt, aprefix, attname);
7910 else
7911 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007912 if (alloc != 0) xmlFree(attvalue);
7913 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007914 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007915 continue;
7916 }
7917
7918 /*
7919 * Add the pair to atts
7920 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007921 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7922 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007923 if (attvalue[len] == 0)
7924 xmlFree(attvalue);
7925 goto failed;
7926 }
7927 maxatts = ctxt->maxatts;
7928 atts = ctxt->atts;
7929 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007930 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931 atts[nbatts++] = attname;
7932 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007933 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007934 atts[nbatts++] = attvalue;
7935 attvalue += len;
7936 atts[nbatts++] = attvalue;
7937 /*
7938 * tag if some deallocation is needed
7939 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007940 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007941 } else {
7942 if ((attvalue != NULL) && (attvalue[len] == 0))
7943 xmlFree(attvalue);
7944 }
7945
7946failed:
7947
7948 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007949 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7951 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007952 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7954 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007955 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007956 }
7957 SKIP_BLANKS;
7958 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7959 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007960 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007961 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 break;
7963 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007965 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 }
7967
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007969 * The attributes defaulting
7970 */
7971 if (ctxt->attsDefault != NULL) {
7972 xmlDefAttrsPtr defaults;
7973
7974 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7975 if (defaults != NULL) {
7976 for (i = 0;i < defaults->nbAttrs;i++) {
7977 attname = defaults->values[4 * i];
7978 aprefix = defaults->values[4 * i + 1];
7979
7980 /*
7981 * special work for namespaces defaulted defs
7982 */
7983 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7984 /*
7985 * check that it's not a defined namespace
7986 */
7987 for (j = 1;j <= nbNs;j++)
7988 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7989 break;
7990 if (j <= nbNs) continue;
7991
7992 nsname = xmlGetNamespace(ctxt, NULL);
7993 if (nsname != defaults->values[4 * i + 2]) {
7994 if (nsPush(ctxt, NULL,
7995 defaults->values[4 * i + 2]) > 0)
7996 nbNs++;
7997 }
7998 } else if (aprefix == ctxt->str_xmlns) {
7999 /*
8000 * check that it's not a defined namespace
8001 */
8002 for (j = 1;j <= nbNs;j++)
8003 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8004 break;
8005 if (j <= nbNs) continue;
8006
8007 nsname = xmlGetNamespace(ctxt, attname);
8008 if (nsname != defaults->values[2]) {
8009 if (nsPush(ctxt, attname,
8010 defaults->values[4 * i + 2]) > 0)
8011 nbNs++;
8012 }
8013 } else {
8014 /*
8015 * check that it's not a defined attribute
8016 */
8017 for (j = 0;j < nbatts;j+=5) {
8018 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8019 break;
8020 }
8021 if (j < nbatts) continue;
8022
8023 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8024 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008025 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008026 }
8027 maxatts = ctxt->maxatts;
8028 atts = ctxt->atts;
8029 }
8030 atts[nbatts++] = attname;
8031 atts[nbatts++] = aprefix;
8032 if (aprefix == NULL)
8033 atts[nbatts++] = NULL;
8034 else
8035 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8036 atts[nbatts++] = defaults->values[4 * i + 2];
8037 atts[nbatts++] = defaults->values[4 * i + 3];
8038 nbdef++;
8039 }
8040 }
8041 }
8042 }
8043
Daniel Veillarde70c8772003-11-25 07:21:18 +00008044 /*
8045 * The attributes checkings
8046 */
8047 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008048 /*
8049 * The default namespace does not apply to attribute names.
8050 */
8051 if (atts[i + 1] != NULL) {
8052 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8053 if (nsname == NULL) {
8054 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8055 "Namespace prefix %s for %s on %s is not defined\n",
8056 atts[i + 1], atts[i], localname);
8057 }
8058 atts[i + 2] = nsname;
8059 } else
8060 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008061 /*
8062 * [ WFC: Unique Att Spec ]
8063 * No attribute name may appear more than once in the same
8064 * start-tag or empty-element tag.
8065 * As extended by the Namespace in XML REC.
8066 */
8067 for (j = 0; j < i;j += 5) {
8068 if (atts[i] == atts[j]) {
8069 if (atts[i+1] == atts[j+1]) {
8070 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8071 break;
8072 }
8073 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8074 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8075 "Namespaced Attribute %s in '%s' redefined\n",
8076 atts[i], nsname, NULL);
8077 break;
8078 }
8079 }
8080 }
8081 }
8082
Daniel Veillarde57ec792003-09-10 10:50:59 +00008083 nsname = xmlGetNamespace(ctxt, prefix);
8084 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008085 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8086 "Namespace prefix %s on %s is not defined\n",
8087 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008088 }
8089 *pref = prefix;
8090 *URI = nsname;
8091
8092 /*
8093 * SAX: Start of Element !
8094 */
8095 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8096 (!ctxt->disableSAX)) {
8097 if (nbNs > 0)
8098 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8099 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8100 nbatts / 5, nbdef, atts);
8101 else
8102 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8103 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8104 }
8105
8106 /*
8107 * Free up attribute allocated strings if needed
8108 */
8109 if (attval != 0) {
8110 for (i = 3,j = 0; j < nratts;i += 5,j++)
8111 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8112 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008113 }
8114
8115 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008116
8117base_changed:
8118 /*
8119 * the attribute strings are valid iif the base didn't changed
8120 */
8121 if (attval != 0) {
8122 for (i = 3,j = 0; j < nratts;i += 5,j++)
8123 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8124 xmlFree((xmlChar *) atts[i]);
8125 }
8126 ctxt->input->cur = ctxt->input->base + cur;
8127 if (ctxt->wellFormed == 1) {
8128 goto reparse;
8129 }
8130 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008131}
8132
8133/**
8134 * xmlParseEndTag2:
8135 * @ctxt: an XML parser context
8136 * @line: line of the start tag
8137 * @nsNr: number of namespaces on the start tag
8138 *
8139 * parse an end of tag
8140 *
8141 * [42] ETag ::= '</' Name S? '>'
8142 *
8143 * With namespace
8144 *
8145 * [NS 9] ETag ::= '</' QName S? '>'
8146 */
8147
8148static void
8149xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008150 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008151 const xmlChar *name;
8152
8153 GROW;
8154 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008155 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008156 return;
8157 }
8158 SKIP(2);
8159
William M. Brack13dfa872004-09-18 04:52:08 +00008160 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008161 if (ctxt->input->cur[tlen] == '>') {
8162 ctxt->input->cur += tlen + 1;
8163 goto done;
8164 }
8165 ctxt->input->cur += tlen;
8166 name = (xmlChar*)1;
8167 } else {
8168 if (prefix == NULL)
8169 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8170 else
8171 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8172 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008173
8174 /*
8175 * We should definitely be at the ending "S? '>'" part
8176 */
8177 GROW;
8178 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008179 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008180 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181 } else
8182 NEXT1;
8183
8184 /*
8185 * [ WFC: Element Type Match ]
8186 * The Name in an element's end-tag must match the element type in the
8187 * start-tag.
8188 *
8189 */
8190 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008191 if (name == NULL) name = BAD_CAST "unparseable";
8192 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008194 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195 }
8196
8197 /*
8198 * SAX: End of Tag
8199 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008200done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8202 (!ctxt->disableSAX))
8203 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8204
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 spacePop(ctxt);
8206 if (nsNr != 0)
8207 nsPop(ctxt, nsNr);
8208 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008209}
8210
8211/**
Owen Taylor3473f882001-02-23 17:55:21 +00008212 * xmlParseCDSect:
8213 * @ctxt: an XML parser context
8214 *
8215 * Parse escaped pure raw content.
8216 *
8217 * [18] CDSect ::= CDStart CData CDEnd
8218 *
8219 * [19] CDStart ::= '<![CDATA['
8220 *
8221 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8222 *
8223 * [21] CDEnd ::= ']]>'
8224 */
8225void
8226xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8227 xmlChar *buf = NULL;
8228 int len = 0;
8229 int size = XML_PARSER_BUFFER_SIZE;
8230 int r, rl;
8231 int s, sl;
8232 int cur, l;
8233 int count = 0;
8234
Daniel Veillard8f597c32003-10-06 08:19:27 +00008235 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008236 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008237 SKIP(9);
8238 } else
8239 return;
8240
8241 ctxt->instate = XML_PARSER_CDATA_SECTION;
8242 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008243 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008244 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008245 ctxt->instate = XML_PARSER_CONTENT;
8246 return;
8247 }
8248 NEXTL(rl);
8249 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008250 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008251 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252 ctxt->instate = XML_PARSER_CONTENT;
8253 return;
8254 }
8255 NEXTL(sl);
8256 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008257 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008258 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008259 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 return;
8261 }
William M. Brack871611b2003-10-18 04:53:14 +00008262 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008263 ((r != ']') || (s != ']') || (cur != '>'))) {
8264 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008265 xmlChar *tmp;
8266
Owen Taylor3473f882001-02-23 17:55:21 +00008267 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008268 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8269 if (tmp == NULL) {
8270 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008271 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008272 return;
8273 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008274 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008275 }
8276 COPY_BUF(rl,buf,len,r);
8277 r = s;
8278 rl = sl;
8279 s = cur;
8280 sl = l;
8281 count++;
8282 if (count > 50) {
8283 GROW;
8284 count = 0;
8285 }
8286 NEXTL(l);
8287 cur = CUR_CHAR(l);
8288 }
8289 buf[len] = 0;
8290 ctxt->instate = XML_PARSER_CONTENT;
8291 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008292 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008293 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008294 xmlFree(buf);
8295 return;
8296 }
8297 NEXTL(l);
8298
8299 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008300 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008301 */
8302 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8303 if (ctxt->sax->cdataBlock != NULL)
8304 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008305 else if (ctxt->sax->characters != NULL)
8306 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 }
8308 xmlFree(buf);
8309}
8310
8311/**
8312 * xmlParseContent:
8313 * @ctxt: an XML parser context
8314 *
8315 * Parse a content:
8316 *
8317 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8318 */
8319
8320void
8321xmlParseContent(xmlParserCtxtPtr ctxt) {
8322 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008323 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008324 ((RAW != '<') || (NXT(1) != '/'))) {
8325 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008326 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008327 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008328
8329 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008330 * First case : a Processing Instruction.
8331 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008332 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008333 xmlParsePI(ctxt);
8334 }
8335
8336 /*
8337 * Second case : a CDSection
8338 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008339 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008340 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008341 xmlParseCDSect(ctxt);
8342 }
8343
8344 /*
8345 * Third case : a comment
8346 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008347 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008348 (NXT(2) == '-') && (NXT(3) == '-')) {
8349 xmlParseComment(ctxt);
8350 ctxt->instate = XML_PARSER_CONTENT;
8351 }
8352
8353 /*
8354 * Fourth case : a sub-element.
8355 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008356 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008357 xmlParseElement(ctxt);
8358 }
8359
8360 /*
8361 * Fifth case : a reference. If if has not been resolved,
8362 * parsing returns it's Name, create the node
8363 */
8364
Daniel Veillard21a0f912001-02-25 19:54:14 +00008365 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008366 xmlParseReference(ctxt);
8367 }
8368
8369 /*
8370 * Last case, text. Note that References are handled directly.
8371 */
8372 else {
8373 xmlParseCharData(ctxt, 0);
8374 }
8375
8376 GROW;
8377 /*
8378 * Pop-up of finished entities.
8379 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008380 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008381 xmlPopInput(ctxt);
8382 SHRINK;
8383
Daniel Veillardfdc91562002-07-01 21:52:03 +00008384 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008385 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8386 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008387 ctxt->instate = XML_PARSER_EOF;
8388 break;
8389 }
8390 }
8391}
8392
8393/**
8394 * xmlParseElement:
8395 * @ctxt: an XML parser context
8396 *
8397 * parse an XML element, this is highly recursive
8398 *
8399 * [39] element ::= EmptyElemTag | STag content ETag
8400 *
8401 * [ WFC: Element Type Match ]
8402 * The Name in an element's end-tag must match the element type in the
8403 * start-tag.
8404 *
Owen Taylor3473f882001-02-23 17:55:21 +00008405 */
8406
8407void
8408xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008409 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008410 const xmlChar *prefix;
8411 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008412 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008413 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008414 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008415 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008416
8417 /* Capture start position */
8418 if (ctxt->record_info) {
8419 node_info.begin_pos = ctxt->input->consumed +
8420 (CUR_PTR - ctxt->input->base);
8421 node_info.begin_line = ctxt->input->line;
8422 }
8423
8424 if (ctxt->spaceNr == 0)
8425 spacePush(ctxt, -1);
8426 else
8427 spacePush(ctxt, *ctxt->space);
8428
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008429 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008430#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008431 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008432#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008433 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008434#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008435 else
8436 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008437#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008438 if (name == NULL) {
8439 spacePop(ctxt);
8440 return;
8441 }
8442 namePush(ctxt, name);
8443 ret = ctxt->node;
8444
Daniel Veillard4432df22003-09-28 18:58:27 +00008445#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008446 /*
8447 * [ VC: Root Element Type ]
8448 * The Name in the document type declaration must match the element
8449 * type of the root element.
8450 */
8451 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8452 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8453 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008454#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008455
8456 /*
8457 * Check for an Empty Element.
8458 */
8459 if ((RAW == '/') && (NXT(1) == '>')) {
8460 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008461 if (ctxt->sax2) {
8462 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8463 (!ctxt->disableSAX))
8464 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008465#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008466 } else {
8467 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8468 (!ctxt->disableSAX))
8469 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008470#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008471 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008472 namePop(ctxt);
8473 spacePop(ctxt);
8474 if (nsNr != ctxt->nsNr)
8475 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008476 if ( ret != NULL && ctxt->record_info ) {
8477 node_info.end_pos = ctxt->input->consumed +
8478 (CUR_PTR - ctxt->input->base);
8479 node_info.end_line = ctxt->input->line;
8480 node_info.node = ret;
8481 xmlParserAddNodeInfo(ctxt, &node_info);
8482 }
8483 return;
8484 }
8485 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008486 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008487 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008488 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8489 "Couldn't find end of Start Tag %s line %d\n",
8490 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008491
8492 /*
8493 * end of parsing of this node.
8494 */
8495 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008498 if (nsNr != ctxt->nsNr)
8499 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008500
8501 /*
8502 * Capture end position and add node
8503 */
8504 if ( ret != NULL && ctxt->record_info ) {
8505 node_info.end_pos = ctxt->input->consumed +
8506 (CUR_PTR - ctxt->input->base);
8507 node_info.end_line = ctxt->input->line;
8508 node_info.node = ret;
8509 xmlParserAddNodeInfo(ctxt, &node_info);
8510 }
8511 return;
8512 }
8513
8514 /*
8515 * Parse the content of the element:
8516 */
8517 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008518 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008519 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008520 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008521 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008522
8523 /*
8524 * end of parsing of this node.
8525 */
8526 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008528 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 if (nsNr != ctxt->nsNr)
8530 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008531 return;
8532 }
8533
8534 /*
8535 * parse the end of tag: '</' should be here.
8536 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008537 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008538 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008539 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008540 }
8541#ifdef LIBXML_SAX1_ENABLED
8542 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008545
8546 /*
8547 * Capture end position and add node
8548 */
8549 if ( ret != NULL && ctxt->record_info ) {
8550 node_info.end_pos = ctxt->input->consumed +
8551 (CUR_PTR - ctxt->input->base);
8552 node_info.end_line = ctxt->input->line;
8553 node_info.node = ret;
8554 xmlParserAddNodeInfo(ctxt, &node_info);
8555 }
8556}
8557
8558/**
8559 * xmlParseVersionNum:
8560 * @ctxt: an XML parser context
8561 *
8562 * parse the XML version value.
8563 *
8564 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8565 *
8566 * Returns the string giving the XML version number, or NULL
8567 */
8568xmlChar *
8569xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8570 xmlChar *buf = NULL;
8571 int len = 0;
8572 int size = 10;
8573 xmlChar cur;
8574
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008575 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008576 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008577 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008578 return(NULL);
8579 }
8580 cur = CUR;
8581 while (((cur >= 'a') && (cur <= 'z')) ||
8582 ((cur >= 'A') && (cur <= 'Z')) ||
8583 ((cur >= '0') && (cur <= '9')) ||
8584 (cur == '_') || (cur == '.') ||
8585 (cur == ':') || (cur == '-')) {
8586 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008587 xmlChar *tmp;
8588
Owen Taylor3473f882001-02-23 17:55:21 +00008589 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008590 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8591 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008592 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008593 return(NULL);
8594 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008595 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008596 }
8597 buf[len++] = cur;
8598 NEXT;
8599 cur=CUR;
8600 }
8601 buf[len] = 0;
8602 return(buf);
8603}
8604
8605/**
8606 * xmlParseVersionInfo:
8607 * @ctxt: an XML parser context
8608 *
8609 * parse the XML version.
8610 *
8611 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8612 *
8613 * [25] Eq ::= S? '=' S?
8614 *
8615 * Returns the version string, e.g. "1.0"
8616 */
8617
8618xmlChar *
8619xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8620 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008621
Daniel Veillarda07050d2003-10-19 14:46:32 +00008622 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008623 SKIP(7);
8624 SKIP_BLANKS;
8625 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008626 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 return(NULL);
8628 }
8629 NEXT;
8630 SKIP_BLANKS;
8631 if (RAW == '"') {
8632 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008633 version = xmlParseVersionNum(ctxt);
8634 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008635 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 } else
8637 NEXT;
8638 } else if (RAW == '\''){
8639 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008640 version = xmlParseVersionNum(ctxt);
8641 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008642 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008643 } else
8644 NEXT;
8645 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008646 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 }
8648 }
8649 return(version);
8650}
8651
8652/**
8653 * xmlParseEncName:
8654 * @ctxt: an XML parser context
8655 *
8656 * parse the XML encoding name
8657 *
8658 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8659 *
8660 * Returns the encoding name value or NULL
8661 */
8662xmlChar *
8663xmlParseEncName(xmlParserCtxtPtr ctxt) {
8664 xmlChar *buf = NULL;
8665 int len = 0;
8666 int size = 10;
8667 xmlChar cur;
8668
8669 cur = CUR;
8670 if (((cur >= 'a') && (cur <= 'z')) ||
8671 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008672 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008675 return(NULL);
8676 }
8677
8678 buf[len++] = cur;
8679 NEXT;
8680 cur = CUR;
8681 while (((cur >= 'a') && (cur <= 'z')) ||
8682 ((cur >= 'A') && (cur <= 'Z')) ||
8683 ((cur >= '0') && (cur <= '9')) ||
8684 (cur == '.') || (cur == '_') ||
8685 (cur == '-')) {
8686 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008687 xmlChar *tmp;
8688
Owen Taylor3473f882001-02-23 17:55:21 +00008689 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008690 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8691 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008692 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008693 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008694 return(NULL);
8695 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008697 }
8698 buf[len++] = cur;
8699 NEXT;
8700 cur = CUR;
8701 if (cur == 0) {
8702 SHRINK;
8703 GROW;
8704 cur = CUR;
8705 }
8706 }
8707 buf[len] = 0;
8708 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008709 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008710 }
8711 return(buf);
8712}
8713
8714/**
8715 * xmlParseEncodingDecl:
8716 * @ctxt: an XML parser context
8717 *
8718 * parse the XML encoding declaration
8719 *
8720 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8721 *
8722 * this setups the conversion filters.
8723 *
8724 * Returns the encoding value or NULL
8725 */
8726
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008727const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008728xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8729 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008730
8731 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008732 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008733 SKIP(8);
8734 SKIP_BLANKS;
8735 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008736 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008737 return(NULL);
8738 }
8739 NEXT;
8740 SKIP_BLANKS;
8741 if (RAW == '"') {
8742 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008743 encoding = xmlParseEncName(ctxt);
8744 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008745 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008746 } else
8747 NEXT;
8748 } else if (RAW == '\''){
8749 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008750 encoding = xmlParseEncName(ctxt);
8751 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 } else
8754 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008755 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008756 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008757 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008758 /*
8759 * UTF-16 encoding stwich has already taken place at this stage,
8760 * more over the little-endian/big-endian selection is already done
8761 */
8762 if ((encoding != NULL) &&
8763 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8764 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008765 if (ctxt->encoding != NULL)
8766 xmlFree((xmlChar *) ctxt->encoding);
8767 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008768 }
8769 /*
8770 * UTF-8 encoding is handled natively
8771 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008772 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008773 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8774 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008775 if (ctxt->encoding != NULL)
8776 xmlFree((xmlChar *) ctxt->encoding);
8777 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008778 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008779 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008780 xmlCharEncodingHandlerPtr handler;
8781
8782 if (ctxt->input->encoding != NULL)
8783 xmlFree((xmlChar *) ctxt->input->encoding);
8784 ctxt->input->encoding = encoding;
8785
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008786 handler = xmlFindCharEncodingHandler((const char *) encoding);
8787 if (handler != NULL) {
8788 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008789 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008790 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008791 "Unsupported encoding %s\n", encoding);
8792 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008793 }
8794 }
8795 }
8796 return(encoding);
8797}
8798
8799/**
8800 * xmlParseSDDecl:
8801 * @ctxt: an XML parser context
8802 *
8803 * parse the XML standalone declaration
8804 *
8805 * [32] SDDecl ::= S 'standalone' Eq
8806 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8807 *
8808 * [ VC: Standalone Document Declaration ]
8809 * TODO The standalone document declaration must have the value "no"
8810 * if any external markup declarations contain declarations of:
8811 * - attributes with default values, if elements to which these
8812 * attributes apply appear in the document without specifications
8813 * of values for these attributes, or
8814 * - entities (other than amp, lt, gt, apos, quot), if references
8815 * to those entities appear in the document, or
8816 * - attributes with values subject to normalization, where the
8817 * attribute appears in the document with a value which will change
8818 * as a result of normalization, or
8819 * - element types with element content, if white space occurs directly
8820 * within any instance of those types.
8821 *
8822 * Returns 1 if standalone, 0 otherwise
8823 */
8824
8825int
8826xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8827 int standalone = -1;
8828
8829 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008830 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008831 SKIP(10);
8832 SKIP_BLANKS;
8833 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008834 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008835 return(standalone);
8836 }
8837 NEXT;
8838 SKIP_BLANKS;
8839 if (RAW == '\''){
8840 NEXT;
8841 if ((RAW == 'n') && (NXT(1) == 'o')) {
8842 standalone = 0;
8843 SKIP(2);
8844 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8845 (NXT(2) == 's')) {
8846 standalone = 1;
8847 SKIP(3);
8848 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008849 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008850 }
8851 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008852 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 } else
8854 NEXT;
8855 } else if (RAW == '"'){
8856 NEXT;
8857 if ((RAW == 'n') && (NXT(1) == 'o')) {
8858 standalone = 0;
8859 SKIP(2);
8860 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8861 (NXT(2) == 's')) {
8862 standalone = 1;
8863 SKIP(3);
8864 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008865 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008866 }
8867 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008869 } else
8870 NEXT;
8871 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008872 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008873 }
8874 }
8875 return(standalone);
8876}
8877
8878/**
8879 * xmlParseXMLDecl:
8880 * @ctxt: an XML parser context
8881 *
8882 * parse an XML declaration header
8883 *
8884 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8885 */
8886
8887void
8888xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8889 xmlChar *version;
8890
8891 /*
8892 * We know that '<?xml' is here.
8893 */
8894 SKIP(5);
8895
William M. Brack76e95df2003-10-18 16:20:14 +00008896 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008897 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8898 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008899 }
8900 SKIP_BLANKS;
8901
8902 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008903 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008904 */
8905 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008906 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008907 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008908 } else {
8909 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8910 /*
8911 * TODO: Blueberry should be detected here
8912 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008913 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8914 "Unsupported version '%s'\n",
8915 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008916 }
8917 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008918 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008919 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008920 }
Owen Taylor3473f882001-02-23 17:55:21 +00008921
8922 /*
8923 * We may have the encoding declaration
8924 */
William M. Brack76e95df2003-10-18 16:20:14 +00008925 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008926 if ((RAW == '?') && (NXT(1) == '>')) {
8927 SKIP(2);
8928 return;
8929 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008931 }
8932 xmlParseEncodingDecl(ctxt);
8933 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8934 /*
8935 * The XML REC instructs us to stop parsing right here
8936 */
8937 return;
8938 }
8939
8940 /*
8941 * We may have the standalone status.
8942 */
William M. Brack76e95df2003-10-18 16:20:14 +00008943 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008944 if ((RAW == '?') && (NXT(1) == '>')) {
8945 SKIP(2);
8946 return;
8947 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008948 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008949 }
8950 SKIP_BLANKS;
8951 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8952
8953 SKIP_BLANKS;
8954 if ((RAW == '?') && (NXT(1) == '>')) {
8955 SKIP(2);
8956 } else if (RAW == '>') {
8957 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008958 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008959 NEXT;
8960 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008961 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008962 MOVETO_ENDTAG(CUR_PTR);
8963 NEXT;
8964 }
8965}
8966
8967/**
8968 * xmlParseMisc:
8969 * @ctxt: an XML parser context
8970 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008971 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008972 *
8973 * [27] Misc ::= Comment | PI | S
8974 */
8975
8976void
8977xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008978 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008979 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008980 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008981 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008982 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008983 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008984 NEXT;
8985 } else
8986 xmlParseComment(ctxt);
8987 }
8988}
8989
8990/**
8991 * xmlParseDocument:
8992 * @ctxt: an XML parser context
8993 *
8994 * parse an XML document (and build a tree if using the standard SAX
8995 * interface).
8996 *
8997 * [1] document ::= prolog element Misc*
8998 *
8999 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9000 *
9001 * Returns 0, -1 in case of error. the parser context is augmented
9002 * as a result of the parsing.
9003 */
9004
9005int
9006xmlParseDocument(xmlParserCtxtPtr ctxt) {
9007 xmlChar start[4];
9008 xmlCharEncoding enc;
9009
9010 xmlInitParser();
9011
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009012 if ((ctxt == NULL) || (ctxt->input == NULL))
9013 return(-1);
9014
Owen Taylor3473f882001-02-23 17:55:21 +00009015 GROW;
9016
9017 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009018 * SAX: detecting the level.
9019 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009020 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021
9022 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009023 * SAX: beginning of the document processing.
9024 */
9025 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9026 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9027
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009028 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9029 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009030 /*
9031 * Get the 4 first bytes and decode the charset
9032 * if enc != XML_CHAR_ENCODING_NONE
9033 * plug some encoding conversion routines.
9034 */
9035 start[0] = RAW;
9036 start[1] = NXT(1);
9037 start[2] = NXT(2);
9038 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009039 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009040 if (enc != XML_CHAR_ENCODING_NONE) {
9041 xmlSwitchEncoding(ctxt, enc);
9042 }
Owen Taylor3473f882001-02-23 17:55:21 +00009043 }
9044
9045
9046 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009047 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009048 }
9049
9050 /*
9051 * Check for the XMLDecl in the Prolog.
9052 */
9053 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009054 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009055
9056 /*
9057 * Note that we will switch encoding on the fly.
9058 */
9059 xmlParseXMLDecl(ctxt);
9060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9061 /*
9062 * The XML REC instructs us to stop parsing right here
9063 */
9064 return(-1);
9065 }
9066 ctxt->standalone = ctxt->input->standalone;
9067 SKIP_BLANKS;
9068 } else {
9069 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9070 }
9071 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9072 ctxt->sax->startDocument(ctxt->userData);
9073
9074 /*
9075 * The Misc part of the Prolog
9076 */
9077 GROW;
9078 xmlParseMisc(ctxt);
9079
9080 /*
9081 * Then possibly doc type declaration(s) and more Misc
9082 * (doctypedecl Misc*)?
9083 */
9084 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009085 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009086
9087 ctxt->inSubset = 1;
9088 xmlParseDocTypeDecl(ctxt);
9089 if (RAW == '[') {
9090 ctxt->instate = XML_PARSER_DTD;
9091 xmlParseInternalSubset(ctxt);
9092 }
9093
9094 /*
9095 * Create and update the external subset.
9096 */
9097 ctxt->inSubset = 2;
9098 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9099 (!ctxt->disableSAX))
9100 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9101 ctxt->extSubSystem, ctxt->extSubURI);
9102 ctxt->inSubset = 0;
9103
9104
9105 ctxt->instate = XML_PARSER_PROLOG;
9106 xmlParseMisc(ctxt);
9107 }
9108
9109 /*
9110 * Time to start parsing the tree itself
9111 */
9112 GROW;
9113 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009114 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9115 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009116 } else {
9117 ctxt->instate = XML_PARSER_CONTENT;
9118 xmlParseElement(ctxt);
9119 ctxt->instate = XML_PARSER_EPILOG;
9120
9121
9122 /*
9123 * The Misc part at the end
9124 */
9125 xmlParseMisc(ctxt);
9126
Daniel Veillard561b7f82002-03-20 21:55:57 +00009127 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009128 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009129 }
9130 ctxt->instate = XML_PARSER_EOF;
9131 }
9132
9133 /*
9134 * SAX: end of the document processing.
9135 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009136 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009137 ctxt->sax->endDocument(ctxt->userData);
9138
Daniel Veillard5997aca2002-03-18 18:36:20 +00009139 /*
9140 * Remove locally kept entity definitions if the tree was not built
9141 */
9142 if ((ctxt->myDoc != NULL) &&
9143 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9144 xmlFreeDoc(ctxt->myDoc);
9145 ctxt->myDoc = NULL;
9146 }
9147
Daniel Veillardc7612992002-02-17 22:47:37 +00009148 if (! ctxt->wellFormed) {
9149 ctxt->valid = 0;
9150 return(-1);
9151 }
Owen Taylor3473f882001-02-23 17:55:21 +00009152 return(0);
9153}
9154
9155/**
9156 * xmlParseExtParsedEnt:
9157 * @ctxt: an XML parser context
9158 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009159 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009160 * An external general parsed entity is well-formed if it matches the
9161 * production labeled extParsedEnt.
9162 *
9163 * [78] extParsedEnt ::= TextDecl? content
9164 *
9165 * Returns 0, -1 in case of error. the parser context is augmented
9166 * as a result of the parsing.
9167 */
9168
9169int
9170xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9171 xmlChar start[4];
9172 xmlCharEncoding enc;
9173
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009174 if ((ctxt == NULL) || (ctxt->input == NULL))
9175 return(-1);
9176
Owen Taylor3473f882001-02-23 17:55:21 +00009177 xmlDefaultSAXHandlerInit();
9178
Daniel Veillard309f81d2003-09-23 09:02:53 +00009179 xmlDetectSAX2(ctxt);
9180
Owen Taylor3473f882001-02-23 17:55:21 +00009181 GROW;
9182
9183 /*
9184 * SAX: beginning of the document processing.
9185 */
9186 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9187 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9188
9189 /*
9190 * Get the 4 first bytes and decode the charset
9191 * if enc != XML_CHAR_ENCODING_NONE
9192 * plug some encoding conversion routines.
9193 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009194 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9195 start[0] = RAW;
9196 start[1] = NXT(1);
9197 start[2] = NXT(2);
9198 start[3] = NXT(3);
9199 enc = xmlDetectCharEncoding(start, 4);
9200 if (enc != XML_CHAR_ENCODING_NONE) {
9201 xmlSwitchEncoding(ctxt, enc);
9202 }
Owen Taylor3473f882001-02-23 17:55:21 +00009203 }
9204
9205
9206 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009207 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009208 }
9209
9210 /*
9211 * Check for the XMLDecl in the Prolog.
9212 */
9213 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009214 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009215
9216 /*
9217 * Note that we will switch encoding on the fly.
9218 */
9219 xmlParseXMLDecl(ctxt);
9220 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9221 /*
9222 * The XML REC instructs us to stop parsing right here
9223 */
9224 return(-1);
9225 }
9226 SKIP_BLANKS;
9227 } else {
9228 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9229 }
9230 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9231 ctxt->sax->startDocument(ctxt->userData);
9232
9233 /*
9234 * Doing validity checking on chunk doesn't make sense
9235 */
9236 ctxt->instate = XML_PARSER_CONTENT;
9237 ctxt->validate = 0;
9238 ctxt->loadsubset = 0;
9239 ctxt->depth = 0;
9240
9241 xmlParseContent(ctxt);
9242
9243 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009244 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009245 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009246 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009247 }
9248
9249 /*
9250 * SAX: end of the document processing.
9251 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009252 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009253 ctxt->sax->endDocument(ctxt->userData);
9254
9255 if (! ctxt->wellFormed) return(-1);
9256 return(0);
9257}
9258
Daniel Veillard73b013f2003-09-30 12:36:01 +00009259#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009260/************************************************************************
9261 * *
9262 * Progressive parsing interfaces *
9263 * *
9264 ************************************************************************/
9265
9266/**
9267 * xmlParseLookupSequence:
9268 * @ctxt: an XML parser context
9269 * @first: the first char to lookup
9270 * @next: the next char to lookup or zero
9271 * @third: the next char to lookup or zero
9272 *
9273 * Try to find if a sequence (first, next, third) or just (first next) or
9274 * (first) is available in the input stream.
9275 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9276 * to avoid rescanning sequences of bytes, it DOES change the state of the
9277 * parser, do not use liberally.
9278 *
9279 * Returns the index to the current parsing point if the full sequence
9280 * is available, -1 otherwise.
9281 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009282static int
Owen Taylor3473f882001-02-23 17:55:21 +00009283xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9284 xmlChar next, xmlChar third) {
9285 int base, len;
9286 xmlParserInputPtr in;
9287 const xmlChar *buf;
9288
9289 in = ctxt->input;
9290 if (in == NULL) return(-1);
9291 base = in->cur - in->base;
9292 if (base < 0) return(-1);
9293 if (ctxt->checkIndex > base)
9294 base = ctxt->checkIndex;
9295 if (in->buf == NULL) {
9296 buf = in->base;
9297 len = in->length;
9298 } else {
9299 buf = in->buf->buffer->content;
9300 len = in->buf->buffer->use;
9301 }
9302 /* take into account the sequence length */
9303 if (third) len -= 2;
9304 else if (next) len --;
9305 for (;base < len;base++) {
9306 if (buf[base] == first) {
9307 if (third != 0) {
9308 if ((buf[base + 1] != next) ||
9309 (buf[base + 2] != third)) continue;
9310 } else if (next != 0) {
9311 if (buf[base + 1] != next) continue;
9312 }
9313 ctxt->checkIndex = 0;
9314#ifdef DEBUG_PUSH
9315 if (next == 0)
9316 xmlGenericError(xmlGenericErrorContext,
9317 "PP: lookup '%c' found at %d\n",
9318 first, base);
9319 else if (third == 0)
9320 xmlGenericError(xmlGenericErrorContext,
9321 "PP: lookup '%c%c' found at %d\n",
9322 first, next, base);
9323 else
9324 xmlGenericError(xmlGenericErrorContext,
9325 "PP: lookup '%c%c%c' found at %d\n",
9326 first, next, third, base);
9327#endif
9328 return(base - (in->cur - in->base));
9329 }
9330 }
9331 ctxt->checkIndex = base;
9332#ifdef DEBUG_PUSH
9333 if (next == 0)
9334 xmlGenericError(xmlGenericErrorContext,
9335 "PP: lookup '%c' failed\n", first);
9336 else if (third == 0)
9337 xmlGenericError(xmlGenericErrorContext,
9338 "PP: lookup '%c%c' failed\n", first, next);
9339 else
9340 xmlGenericError(xmlGenericErrorContext,
9341 "PP: lookup '%c%c%c' failed\n", first, next, third);
9342#endif
9343 return(-1);
9344}
9345
9346/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009347 * xmlParseGetLasts:
9348 * @ctxt: an XML parser context
9349 * @lastlt: pointer to store the last '<' from the input
9350 * @lastgt: pointer to store the last '>' from the input
9351 *
9352 * Lookup the last < and > in the current chunk
9353 */
9354static void
9355xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9356 const xmlChar **lastgt) {
9357 const xmlChar *tmp;
9358
9359 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9360 xmlGenericError(xmlGenericErrorContext,
9361 "Internal error: xmlParseGetLasts\n");
9362 return;
9363 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009364 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009365 tmp = ctxt->input->end;
9366 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009367 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009368 if (tmp < ctxt->input->base) {
9369 *lastlt = NULL;
9370 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009371 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009372 *lastlt = tmp;
9373 tmp++;
9374 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9375 if (*tmp == '\'') {
9376 tmp++;
9377 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9378 if (tmp < ctxt->input->end) tmp++;
9379 } else if (*tmp == '"') {
9380 tmp++;
9381 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9382 if (tmp < ctxt->input->end) tmp++;
9383 } else
9384 tmp++;
9385 }
9386 if (tmp < ctxt->input->end)
9387 *lastgt = tmp;
9388 else {
9389 tmp = *lastlt;
9390 tmp--;
9391 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9392 if (tmp >= ctxt->input->base)
9393 *lastgt = tmp;
9394 else
9395 *lastgt = NULL;
9396 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009397 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009398 } else {
9399 *lastlt = NULL;
9400 *lastgt = NULL;
9401 }
9402}
9403/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009404 * xmlCheckCdataPush:
9405 * @cur: pointer to the bock of characters
9406 * @len: length of the block in bytes
9407 *
9408 * Check that the block of characters is okay as SCdata content [20]
9409 *
9410 * Returns the number of bytes to pass if okay, a negative index where an
9411 * UTF-8 error occured otherwise
9412 */
9413static int
9414xmlCheckCdataPush(const xmlChar *utf, int len) {
9415 int ix;
9416 unsigned char c;
9417 int codepoint;
9418
9419 if ((utf == NULL) || (len <= 0))
9420 return(0);
9421
9422 for (ix = 0; ix < len;) { /* string is 0-terminated */
9423 c = utf[ix];
9424 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9425 if (c >= 0x20)
9426 ix++;
9427 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9428 ix++;
9429 else
9430 return(-ix);
9431 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9432 if (ix + 2 > len) return(ix);
9433 if ((utf[ix+1] & 0xc0 ) != 0x80)
9434 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009435 codepoint = (utf[ix] & 0x1f) << 6;
9436 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009437 if (!xmlIsCharQ(codepoint))
9438 return(-ix);
9439 ix += 2;
9440 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9441 if (ix + 3 > len) return(ix);
9442 if (((utf[ix+1] & 0xc0) != 0x80) ||
9443 ((utf[ix+2] & 0xc0) != 0x80))
9444 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009445 codepoint = (utf[ix] & 0xf) << 12;
9446 codepoint |= (utf[ix+1] & 0x3f) << 6;
9447 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009448 if (!xmlIsCharQ(codepoint))
9449 return(-ix);
9450 ix += 3;
9451 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9452 if (ix + 4 > len) return(ix);
9453 if (((utf[ix+1] & 0xc0) != 0x80) ||
9454 ((utf[ix+2] & 0xc0) != 0x80) ||
9455 ((utf[ix+3] & 0xc0) != 0x80))
9456 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009457 codepoint = (utf[ix] & 0x7) << 18;
9458 codepoint |= (utf[ix+1] & 0x3f) << 12;
9459 codepoint |= (utf[ix+2] & 0x3f) << 6;
9460 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009461 if (!xmlIsCharQ(codepoint))
9462 return(-ix);
9463 ix += 4;
9464 } else /* unknown encoding */
9465 return(-ix);
9466 }
9467 return(ix);
9468}
9469
9470/**
Owen Taylor3473f882001-02-23 17:55:21 +00009471 * xmlParseTryOrFinish:
9472 * @ctxt: an XML parser context
9473 * @terminate: last chunk indicator
9474 *
9475 * Try to progress on parsing
9476 *
9477 * Returns zero if no parsing was possible
9478 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009479static int
Owen Taylor3473f882001-02-23 17:55:21 +00009480xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9481 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009482 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009483 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009485
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009486 if (ctxt->input == NULL)
9487 return(0);
9488
Owen Taylor3473f882001-02-23 17:55:21 +00009489#ifdef DEBUG_PUSH
9490 switch (ctxt->instate) {
9491 case XML_PARSER_EOF:
9492 xmlGenericError(xmlGenericErrorContext,
9493 "PP: try EOF\n"); break;
9494 case XML_PARSER_START:
9495 xmlGenericError(xmlGenericErrorContext,
9496 "PP: try START\n"); break;
9497 case XML_PARSER_MISC:
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: try MISC\n");break;
9500 case XML_PARSER_COMMENT:
9501 xmlGenericError(xmlGenericErrorContext,
9502 "PP: try COMMENT\n");break;
9503 case XML_PARSER_PROLOG:
9504 xmlGenericError(xmlGenericErrorContext,
9505 "PP: try PROLOG\n");break;
9506 case XML_PARSER_START_TAG:
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: try START_TAG\n");break;
9509 case XML_PARSER_CONTENT:
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: try CONTENT\n");break;
9512 case XML_PARSER_CDATA_SECTION:
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: try CDATA_SECTION\n");break;
9515 case XML_PARSER_END_TAG:
9516 xmlGenericError(xmlGenericErrorContext,
9517 "PP: try END_TAG\n");break;
9518 case XML_PARSER_ENTITY_DECL:
9519 xmlGenericError(xmlGenericErrorContext,
9520 "PP: try ENTITY_DECL\n");break;
9521 case XML_PARSER_ENTITY_VALUE:
9522 xmlGenericError(xmlGenericErrorContext,
9523 "PP: try ENTITY_VALUE\n");break;
9524 case XML_PARSER_ATTRIBUTE_VALUE:
9525 xmlGenericError(xmlGenericErrorContext,
9526 "PP: try ATTRIBUTE_VALUE\n");break;
9527 case XML_PARSER_DTD:
9528 xmlGenericError(xmlGenericErrorContext,
9529 "PP: try DTD\n");break;
9530 case XML_PARSER_EPILOG:
9531 xmlGenericError(xmlGenericErrorContext,
9532 "PP: try EPILOG\n");break;
9533 case XML_PARSER_PI:
9534 xmlGenericError(xmlGenericErrorContext,
9535 "PP: try PI\n");break;
9536 case XML_PARSER_IGNORE:
9537 xmlGenericError(xmlGenericErrorContext,
9538 "PP: try IGNORE\n");break;
9539 }
9540#endif
9541
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009542 if ((ctxt->input != NULL) &&
9543 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009544 xmlSHRINK(ctxt);
9545 ctxt->checkIndex = 0;
9546 }
9547 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009548
Daniel Veillarda880b122003-04-21 21:36:41 +00009549 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009550 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009551 return(0);
9552
9553
Owen Taylor3473f882001-02-23 17:55:21 +00009554 /*
9555 * Pop-up of finished entities.
9556 */
9557 while ((RAW == 0) && (ctxt->inputNr > 1))
9558 xmlPopInput(ctxt);
9559
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009560 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009561 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009562 avail = ctxt->input->length -
9563 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009564 else {
9565 /*
9566 * If we are operating on converted input, try to flush
9567 * remainng chars to avoid them stalling in the non-converted
9568 * buffer.
9569 */
9570 if ((ctxt->input->buf->raw != NULL) &&
9571 (ctxt->input->buf->raw->use > 0)) {
9572 int base = ctxt->input->base -
9573 ctxt->input->buf->buffer->content;
9574 int current = ctxt->input->cur - ctxt->input->base;
9575
9576 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9577 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9578 ctxt->input->cur = ctxt->input->base + current;
9579 ctxt->input->end =
9580 &ctxt->input->buf->buffer->content[
9581 ctxt->input->buf->buffer->use];
9582 }
9583 avail = ctxt->input->buf->buffer->use -
9584 (ctxt->input->cur - ctxt->input->base);
9585 }
Owen Taylor3473f882001-02-23 17:55:21 +00009586 if (avail < 1)
9587 goto done;
9588 switch (ctxt->instate) {
9589 case XML_PARSER_EOF:
9590 /*
9591 * Document parsing is done !
9592 */
9593 goto done;
9594 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009595 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9596 xmlChar start[4];
9597 xmlCharEncoding enc;
9598
9599 /*
9600 * Very first chars read from the document flow.
9601 */
9602 if (avail < 4)
9603 goto done;
9604
9605 /*
9606 * Get the 4 first bytes and decode the charset
9607 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009608 * plug some encoding conversion routines,
9609 * else xmlSwitchEncoding will set to (default)
9610 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009611 */
9612 start[0] = RAW;
9613 start[1] = NXT(1);
9614 start[2] = NXT(2);
9615 start[3] = NXT(3);
9616 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009617 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009618 break;
9619 }
Owen Taylor3473f882001-02-23 17:55:21 +00009620
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009621 if (avail < 2)
9622 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009623 cur = ctxt->input->cur[0];
9624 next = ctxt->input->cur[1];
9625 if (cur == 0) {
9626 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9627 ctxt->sax->setDocumentLocator(ctxt->userData,
9628 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009629 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009630 ctxt->instate = XML_PARSER_EOF;
9631#ifdef DEBUG_PUSH
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: entering EOF\n");
9634#endif
9635 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9636 ctxt->sax->endDocument(ctxt->userData);
9637 goto done;
9638 }
9639 if ((cur == '<') && (next == '?')) {
9640 /* PI or XML decl */
9641 if (avail < 5) return(ret);
9642 if ((!terminate) &&
9643 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9644 return(ret);
9645 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9646 ctxt->sax->setDocumentLocator(ctxt->userData,
9647 &xmlDefaultSAXLocator);
9648 if ((ctxt->input->cur[2] == 'x') &&
9649 (ctxt->input->cur[3] == 'm') &&
9650 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009651 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009652 ret += 5;
9653#ifdef DEBUG_PUSH
9654 xmlGenericError(xmlGenericErrorContext,
9655 "PP: Parsing XML Decl\n");
9656#endif
9657 xmlParseXMLDecl(ctxt);
9658 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9659 /*
9660 * The XML REC instructs us to stop parsing right
9661 * here
9662 */
9663 ctxt->instate = XML_PARSER_EOF;
9664 return(0);
9665 }
9666 ctxt->standalone = ctxt->input->standalone;
9667 if ((ctxt->encoding == NULL) &&
9668 (ctxt->input->encoding != NULL))
9669 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9670 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9671 (!ctxt->disableSAX))
9672 ctxt->sax->startDocument(ctxt->userData);
9673 ctxt->instate = XML_PARSER_MISC;
9674#ifdef DEBUG_PUSH
9675 xmlGenericError(xmlGenericErrorContext,
9676 "PP: entering MISC\n");
9677#endif
9678 } else {
9679 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9680 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9681 (!ctxt->disableSAX))
9682 ctxt->sax->startDocument(ctxt->userData);
9683 ctxt->instate = XML_PARSER_MISC;
9684#ifdef DEBUG_PUSH
9685 xmlGenericError(xmlGenericErrorContext,
9686 "PP: entering MISC\n");
9687#endif
9688 }
9689 } else {
9690 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9691 ctxt->sax->setDocumentLocator(ctxt->userData,
9692 &xmlDefaultSAXLocator);
9693 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009694 if (ctxt->version == NULL) {
9695 xmlErrMemory(ctxt, NULL);
9696 break;
9697 }
Owen Taylor3473f882001-02-23 17:55:21 +00009698 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9699 (!ctxt->disableSAX))
9700 ctxt->sax->startDocument(ctxt->userData);
9701 ctxt->instate = XML_PARSER_MISC;
9702#ifdef DEBUG_PUSH
9703 xmlGenericError(xmlGenericErrorContext,
9704 "PP: entering MISC\n");
9705#endif
9706 }
9707 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009708 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009709 const xmlChar *name;
9710 const xmlChar *prefix;
9711 const xmlChar *URI;
9712 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009713
9714 if ((avail < 2) && (ctxt->inputNr == 1))
9715 goto done;
9716 cur = ctxt->input->cur[0];
9717 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009718 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009719 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009720 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9721 ctxt->sax->endDocument(ctxt->userData);
9722 goto done;
9723 }
9724 if (!terminate) {
9725 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009726 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009727 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009728 goto done;
9729 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9730 goto done;
9731 }
9732 }
9733 if (ctxt->spaceNr == 0)
9734 spacePush(ctxt, -1);
9735 else
9736 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009737#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009738 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009739#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009740 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009741#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009742 else
9743 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009744#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009745 if (name == NULL) {
9746 spacePop(ctxt);
9747 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009748 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9749 ctxt->sax->endDocument(ctxt->userData);
9750 goto done;
9751 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009752#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009753 /*
9754 * [ VC: Root Element Type ]
9755 * The Name in the document type declaration must match
9756 * the element type of the root element.
9757 */
9758 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9759 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9760 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009761#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009762
9763 /*
9764 * Check for an Empty Element.
9765 */
9766 if ((RAW == '/') && (NXT(1) == '>')) {
9767 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009768
9769 if (ctxt->sax2) {
9770 if ((ctxt->sax != NULL) &&
9771 (ctxt->sax->endElementNs != NULL) &&
9772 (!ctxt->disableSAX))
9773 ctxt->sax->endElementNs(ctxt->userData, name,
9774 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009775 if (ctxt->nsNr - nsNr > 0)
9776 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009777#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009778 } else {
9779 if ((ctxt->sax != NULL) &&
9780 (ctxt->sax->endElement != NULL) &&
9781 (!ctxt->disableSAX))
9782 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009783#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009784 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009785 spacePop(ctxt);
9786 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009787 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009788 } else {
9789 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009790 }
9791 break;
9792 }
9793 if (RAW == '>') {
9794 NEXT;
9795 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009796 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009797 "Couldn't find end of Start Tag %s\n",
9798 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009799 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009800 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009801 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009802 if (ctxt->sax2)
9803 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009805 else
9806 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009807#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009808
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009810 break;
9811 }
9812 case XML_PARSER_CONTENT: {
9813 const xmlChar *test;
9814 unsigned int cons;
9815 if ((avail < 2) && (ctxt->inputNr == 1))
9816 goto done;
9817 cur = ctxt->input->cur[0];
9818 next = ctxt->input->cur[1];
9819
9820 test = CUR_PTR;
9821 cons = ctxt->input->consumed;
9822 if ((cur == '<') && (next == '/')) {
9823 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009824 break;
9825 } else if ((cur == '<') && (next == '?')) {
9826 if ((!terminate) &&
9827 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9828 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009829 xmlParsePI(ctxt);
9830 } else if ((cur == '<') && (next != '!')) {
9831 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009832 break;
9833 } else if ((cur == '<') && (next == '!') &&
9834 (ctxt->input->cur[2] == '-') &&
9835 (ctxt->input->cur[3] == '-')) {
9836 if ((!terminate) &&
9837 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9838 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009839 xmlParseComment(ctxt);
9840 ctxt->instate = XML_PARSER_CONTENT;
9841 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9842 (ctxt->input->cur[2] == '[') &&
9843 (ctxt->input->cur[3] == 'C') &&
9844 (ctxt->input->cur[4] == 'D') &&
9845 (ctxt->input->cur[5] == 'A') &&
9846 (ctxt->input->cur[6] == 'T') &&
9847 (ctxt->input->cur[7] == 'A') &&
9848 (ctxt->input->cur[8] == '[')) {
9849 SKIP(9);
9850 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 break;
9852 } else if ((cur == '<') && (next == '!') &&
9853 (avail < 9)) {
9854 goto done;
9855 } else if (cur == '&') {
9856 if ((!terminate) &&
9857 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9858 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009859 xmlParseReference(ctxt);
9860 } else {
9861 /* TODO Avoid the extra copy, handle directly !!! */
9862 /*
9863 * Goal of the following test is:
9864 * - minimize calls to the SAX 'character' callback
9865 * when they are mergeable
9866 * - handle an problem for isBlank when we only parse
9867 * a sequence of blank chars and the next one is
9868 * not available to check against '<' presence.
9869 * - tries to homogenize the differences in SAX
9870 * callbacks between the push and pull versions
9871 * of the parser.
9872 */
9873 if ((ctxt->inputNr == 1) &&
9874 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9875 if (!terminate) {
9876 if (ctxt->progressive) {
9877 if ((lastlt == NULL) ||
9878 (ctxt->input->cur > lastlt))
9879 goto done;
9880 } else if (xmlParseLookupSequence(ctxt,
9881 '<', 0, 0) < 0) {
9882 goto done;
9883 }
9884 }
9885 }
9886 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009887 xmlParseCharData(ctxt, 0);
9888 }
9889 /*
9890 * Pop-up of finished entities.
9891 */
9892 while ((RAW == 0) && (ctxt->inputNr > 1))
9893 xmlPopInput(ctxt);
9894 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009895 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9896 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009897 ctxt->instate = XML_PARSER_EOF;
9898 break;
9899 }
9900 break;
9901 }
9902 case XML_PARSER_END_TAG:
9903 if (avail < 2)
9904 goto done;
9905 if (!terminate) {
9906 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009907 /* > can be found unescaped in attribute values */
9908 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 goto done;
9910 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9911 goto done;
9912 }
9913 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009914 if (ctxt->sax2) {
9915 xmlParseEndTag2(ctxt,
9916 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9917 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009918 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009919 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009920 }
9921#ifdef LIBXML_SAX1_ENABLED
9922 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009923 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009924#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009925 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009926 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009927 } else {
9928 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009929 }
9930 break;
9931 case XML_PARSER_CDATA_SECTION: {
9932 /*
9933 * The Push mode need to have the SAX callback for
9934 * cdataBlock merge back contiguous callbacks.
9935 */
9936 int base;
9937
9938 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9939 if (base < 0) {
9940 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009941 int tmp;
9942
9943 tmp = xmlCheckCdataPush(ctxt->input->cur,
9944 XML_PARSER_BIG_BUFFER_SIZE);
9945 if (tmp < 0) {
9946 tmp = -tmp;
9947 ctxt->input->cur += tmp;
9948 goto encoding_error;
9949 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9951 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009952 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009953 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009954 else if (ctxt->sax->characters != NULL)
9955 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009956 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009957 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009958 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009959 ctxt->checkIndex = 0;
9960 }
9961 goto done;
9962 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009963 int tmp;
9964
9965 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9966 if ((tmp < 0) || (tmp != base)) {
9967 tmp = -tmp;
9968 ctxt->input->cur += tmp;
9969 goto encoding_error;
9970 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009971 if ((ctxt->sax != NULL) && (base > 0) &&
9972 (!ctxt->disableSAX)) {
9973 if (ctxt->sax->cdataBlock != NULL)
9974 ctxt->sax->cdataBlock(ctxt->userData,
9975 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009976 else if (ctxt->sax->characters != NULL)
9977 ctxt->sax->characters(ctxt->userData,
9978 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009979 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009980 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009981 ctxt->checkIndex = 0;
9982 ctxt->instate = XML_PARSER_CONTENT;
9983#ifdef DEBUG_PUSH
9984 xmlGenericError(xmlGenericErrorContext,
9985 "PP: entering CONTENT\n");
9986#endif
9987 }
9988 break;
9989 }
Owen Taylor3473f882001-02-23 17:55:21 +00009990 case XML_PARSER_MISC:
9991 SKIP_BLANKS;
9992 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009993 avail = ctxt->input->length -
9994 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009995 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009996 avail = ctxt->input->buf->buffer->use -
9997 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009998 if (avail < 2)
9999 goto done;
10000 cur = ctxt->input->cur[0];
10001 next = ctxt->input->cur[1];
10002 if ((cur == '<') && (next == '?')) {
10003 if ((!terminate) &&
10004 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10005 goto done;
10006#ifdef DEBUG_PUSH
10007 xmlGenericError(xmlGenericErrorContext,
10008 "PP: Parsing PI\n");
10009#endif
10010 xmlParsePI(ctxt);
10011 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010012 (ctxt->input->cur[2] == '-') &&
10013 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010014 if ((!terminate) &&
10015 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10016 goto done;
10017#ifdef DEBUG_PUSH
10018 xmlGenericError(xmlGenericErrorContext,
10019 "PP: Parsing Comment\n");
10020#endif
10021 xmlParseComment(ctxt);
10022 ctxt->instate = XML_PARSER_MISC;
10023 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010024 (ctxt->input->cur[2] == 'D') &&
10025 (ctxt->input->cur[3] == 'O') &&
10026 (ctxt->input->cur[4] == 'C') &&
10027 (ctxt->input->cur[5] == 'T') &&
10028 (ctxt->input->cur[6] == 'Y') &&
10029 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010030 (ctxt->input->cur[8] == 'E')) {
10031 if ((!terminate) &&
10032 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10033 goto done;
10034#ifdef DEBUG_PUSH
10035 xmlGenericError(xmlGenericErrorContext,
10036 "PP: Parsing internal subset\n");
10037#endif
10038 ctxt->inSubset = 1;
10039 xmlParseDocTypeDecl(ctxt);
10040 if (RAW == '[') {
10041 ctxt->instate = XML_PARSER_DTD;
10042#ifdef DEBUG_PUSH
10043 xmlGenericError(xmlGenericErrorContext,
10044 "PP: entering DTD\n");
10045#endif
10046 } else {
10047 /*
10048 * Create and update the external subset.
10049 */
10050 ctxt->inSubset = 2;
10051 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10052 (ctxt->sax->externalSubset != NULL))
10053 ctxt->sax->externalSubset(ctxt->userData,
10054 ctxt->intSubName, ctxt->extSubSystem,
10055 ctxt->extSubURI);
10056 ctxt->inSubset = 0;
10057 ctxt->instate = XML_PARSER_PROLOG;
10058#ifdef DEBUG_PUSH
10059 xmlGenericError(xmlGenericErrorContext,
10060 "PP: entering PROLOG\n");
10061#endif
10062 }
10063 } else if ((cur == '<') && (next == '!') &&
10064 (avail < 9)) {
10065 goto done;
10066 } else {
10067 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010068 ctxt->progressive = 1;
10069 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010070#ifdef DEBUG_PUSH
10071 xmlGenericError(xmlGenericErrorContext,
10072 "PP: entering START_TAG\n");
10073#endif
10074 }
10075 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010076 case XML_PARSER_PROLOG:
10077 SKIP_BLANKS;
10078 if (ctxt->input->buf == NULL)
10079 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10080 else
10081 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10082 if (avail < 2)
10083 goto done;
10084 cur = ctxt->input->cur[0];
10085 next = ctxt->input->cur[1];
10086 if ((cur == '<') && (next == '?')) {
10087 if ((!terminate) &&
10088 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10089 goto done;
10090#ifdef DEBUG_PUSH
10091 xmlGenericError(xmlGenericErrorContext,
10092 "PP: Parsing PI\n");
10093#endif
10094 xmlParsePI(ctxt);
10095 } else if ((cur == '<') && (next == '!') &&
10096 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10097 if ((!terminate) &&
10098 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10099 goto done;
10100#ifdef DEBUG_PUSH
10101 xmlGenericError(xmlGenericErrorContext,
10102 "PP: Parsing Comment\n");
10103#endif
10104 xmlParseComment(ctxt);
10105 ctxt->instate = XML_PARSER_PROLOG;
10106 } else if ((cur == '<') && (next == '!') &&
10107 (avail < 4)) {
10108 goto done;
10109 } else {
10110 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010111 if (ctxt->progressive == 0)
10112 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010113 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010114#ifdef DEBUG_PUSH
10115 xmlGenericError(xmlGenericErrorContext,
10116 "PP: entering START_TAG\n");
10117#endif
10118 }
10119 break;
10120 case XML_PARSER_EPILOG:
10121 SKIP_BLANKS;
10122 if (ctxt->input->buf == NULL)
10123 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10124 else
10125 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10126 if (avail < 2)
10127 goto done;
10128 cur = ctxt->input->cur[0];
10129 next = ctxt->input->cur[1];
10130 if ((cur == '<') && (next == '?')) {
10131 if ((!terminate) &&
10132 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10133 goto done;
10134#ifdef DEBUG_PUSH
10135 xmlGenericError(xmlGenericErrorContext,
10136 "PP: Parsing PI\n");
10137#endif
10138 xmlParsePI(ctxt);
10139 ctxt->instate = XML_PARSER_EPILOG;
10140 } else if ((cur == '<') && (next == '!') &&
10141 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10142 if ((!terminate) &&
10143 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10144 goto done;
10145#ifdef DEBUG_PUSH
10146 xmlGenericError(xmlGenericErrorContext,
10147 "PP: Parsing Comment\n");
10148#endif
10149 xmlParseComment(ctxt);
10150 ctxt->instate = XML_PARSER_EPILOG;
10151 } else if ((cur == '<') && (next == '!') &&
10152 (avail < 4)) {
10153 goto done;
10154 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010155 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010156 ctxt->instate = XML_PARSER_EOF;
10157#ifdef DEBUG_PUSH
10158 xmlGenericError(xmlGenericErrorContext,
10159 "PP: entering EOF\n");
10160#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010161 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010162 ctxt->sax->endDocument(ctxt->userData);
10163 goto done;
10164 }
10165 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010166 case XML_PARSER_DTD: {
10167 /*
10168 * Sorry but progressive parsing of the internal subset
10169 * is not expected to be supported. We first check that
10170 * the full content of the internal subset is available and
10171 * the parsing is launched only at that point.
10172 * Internal subset ends up with "']' S? '>'" in an unescaped
10173 * section and not in a ']]>' sequence which are conditional
10174 * sections (whoever argued to keep that crap in XML deserve
10175 * a place in hell !).
10176 */
10177 int base, i;
10178 xmlChar *buf;
10179 xmlChar quote = 0;
10180
10181 base = ctxt->input->cur - ctxt->input->base;
10182 if (base < 0) return(0);
10183 if (ctxt->checkIndex > base)
10184 base = ctxt->checkIndex;
10185 buf = ctxt->input->buf->buffer->content;
10186 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10187 base++) {
10188 if (quote != 0) {
10189 if (buf[base] == quote)
10190 quote = 0;
10191 continue;
10192 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010193 if ((quote == 0) && (buf[base] == '<')) {
10194 int found = 0;
10195 /* special handling of comments */
10196 if (((unsigned int) base + 4 <
10197 ctxt->input->buf->buffer->use) &&
10198 (buf[base + 1] == '!') &&
10199 (buf[base + 2] == '-') &&
10200 (buf[base + 3] == '-')) {
10201 for (;(unsigned int) base + 3 <
10202 ctxt->input->buf->buffer->use; base++) {
10203 if ((buf[base] == '-') &&
10204 (buf[base + 1] == '-') &&
10205 (buf[base + 2] == '>')) {
10206 found = 1;
10207 base += 2;
10208 break;
10209 }
10210 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010211 if (!found) {
10212#if 0
10213 fprintf(stderr, "unfinished comment\n");
10214#endif
10215 break; /* for */
10216 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010217 continue;
10218 }
10219 }
Owen Taylor3473f882001-02-23 17:55:21 +000010220 if (buf[base] == '"') {
10221 quote = '"';
10222 continue;
10223 }
10224 if (buf[base] == '\'') {
10225 quote = '\'';
10226 continue;
10227 }
10228 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010229#if 0
10230 fprintf(stderr, "%c%c%c%c: ", buf[base],
10231 buf[base + 1], buf[base + 2], buf[base + 3]);
10232#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010233 if ((unsigned int) base +1 >=
10234 ctxt->input->buf->buffer->use)
10235 break;
10236 if (buf[base + 1] == ']') {
10237 /* conditional crap, skip both ']' ! */
10238 base++;
10239 continue;
10240 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010241 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010242 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10243 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010244 if (buf[base + i] == '>') {
10245#if 0
10246 fprintf(stderr, "found\n");
10247#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010248 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010249 }
10250 if (!IS_BLANK_CH(buf[base + i])) {
10251#if 0
10252 fprintf(stderr, "not found\n");
10253#endif
10254 goto not_end_of_int_subset;
10255 }
Owen Taylor3473f882001-02-23 17:55:21 +000010256 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010257#if 0
10258 fprintf(stderr, "end of stream\n");
10259#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010260 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010261
Owen Taylor3473f882001-02-23 17:55:21 +000010262 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010263not_end_of_int_subset:
10264 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010265 }
10266 /*
10267 * We didn't found the end of the Internal subset
10268 */
Owen Taylor3473f882001-02-23 17:55:21 +000010269#ifdef DEBUG_PUSH
10270 if (next == 0)
10271 xmlGenericError(xmlGenericErrorContext,
10272 "PP: lookup of int subset end filed\n");
10273#endif
10274 goto done;
10275
10276found_end_int_subset:
10277 xmlParseInternalSubset(ctxt);
10278 ctxt->inSubset = 2;
10279 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10280 (ctxt->sax->externalSubset != NULL))
10281 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10282 ctxt->extSubSystem, ctxt->extSubURI);
10283 ctxt->inSubset = 0;
10284 ctxt->instate = XML_PARSER_PROLOG;
10285 ctxt->checkIndex = 0;
10286#ifdef DEBUG_PUSH
10287 xmlGenericError(xmlGenericErrorContext,
10288 "PP: entering PROLOG\n");
10289#endif
10290 break;
10291 }
10292 case XML_PARSER_COMMENT:
10293 xmlGenericError(xmlGenericErrorContext,
10294 "PP: internal error, state == COMMENT\n");
10295 ctxt->instate = XML_PARSER_CONTENT;
10296#ifdef DEBUG_PUSH
10297 xmlGenericError(xmlGenericErrorContext,
10298 "PP: entering CONTENT\n");
10299#endif
10300 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010301 case XML_PARSER_IGNORE:
10302 xmlGenericError(xmlGenericErrorContext,
10303 "PP: internal error, state == IGNORE");
10304 ctxt->instate = XML_PARSER_DTD;
10305#ifdef DEBUG_PUSH
10306 xmlGenericError(xmlGenericErrorContext,
10307 "PP: entering DTD\n");
10308#endif
10309 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010310 case XML_PARSER_PI:
10311 xmlGenericError(xmlGenericErrorContext,
10312 "PP: internal error, state == PI\n");
10313 ctxt->instate = XML_PARSER_CONTENT;
10314#ifdef DEBUG_PUSH
10315 xmlGenericError(xmlGenericErrorContext,
10316 "PP: entering CONTENT\n");
10317#endif
10318 break;
10319 case XML_PARSER_ENTITY_DECL:
10320 xmlGenericError(xmlGenericErrorContext,
10321 "PP: internal error, state == ENTITY_DECL\n");
10322 ctxt->instate = XML_PARSER_DTD;
10323#ifdef DEBUG_PUSH
10324 xmlGenericError(xmlGenericErrorContext,
10325 "PP: entering DTD\n");
10326#endif
10327 break;
10328 case XML_PARSER_ENTITY_VALUE:
10329 xmlGenericError(xmlGenericErrorContext,
10330 "PP: internal error, state == ENTITY_VALUE\n");
10331 ctxt->instate = XML_PARSER_CONTENT;
10332#ifdef DEBUG_PUSH
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: entering DTD\n");
10335#endif
10336 break;
10337 case XML_PARSER_ATTRIBUTE_VALUE:
10338 xmlGenericError(xmlGenericErrorContext,
10339 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10340 ctxt->instate = XML_PARSER_START_TAG;
10341#ifdef DEBUG_PUSH
10342 xmlGenericError(xmlGenericErrorContext,
10343 "PP: entering START_TAG\n");
10344#endif
10345 break;
10346 case XML_PARSER_SYSTEM_LITERAL:
10347 xmlGenericError(xmlGenericErrorContext,
10348 "PP: internal error, state == SYSTEM_LITERAL\n");
10349 ctxt->instate = XML_PARSER_START_TAG;
10350#ifdef DEBUG_PUSH
10351 xmlGenericError(xmlGenericErrorContext,
10352 "PP: entering START_TAG\n");
10353#endif
10354 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010355 case XML_PARSER_PUBLIC_LITERAL:
10356 xmlGenericError(xmlGenericErrorContext,
10357 "PP: internal error, state == PUBLIC_LITERAL\n");
10358 ctxt->instate = XML_PARSER_START_TAG;
10359#ifdef DEBUG_PUSH
10360 xmlGenericError(xmlGenericErrorContext,
10361 "PP: entering START_TAG\n");
10362#endif
10363 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010364 }
10365 }
10366done:
10367#ifdef DEBUG_PUSH
10368 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10369#endif
10370 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010371encoding_error:
10372 {
10373 char buffer[150];
10374
10375 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10376 ctxt->input->cur[0], ctxt->input->cur[1],
10377 ctxt->input->cur[2], ctxt->input->cur[3]);
10378 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10379 "Input is not proper UTF-8, indicate encoding !\n%s",
10380 BAD_CAST buffer, NULL);
10381 }
10382 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010383}
10384
10385/**
Owen Taylor3473f882001-02-23 17:55:21 +000010386 * xmlParseChunk:
10387 * @ctxt: an XML parser context
10388 * @chunk: an char array
10389 * @size: the size in byte of the chunk
10390 * @terminate: last chunk indicator
10391 *
10392 * Parse a Chunk of memory
10393 *
10394 * Returns zero if no error, the xmlParserErrors otherwise.
10395 */
10396int
10397xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10398 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010399 if (ctxt == NULL)
10400 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010401 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010402 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010403 if (ctxt->instate == XML_PARSER_START)
10404 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010405 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10406 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10407 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10408 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010409 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010410
William M. Bracka3215c72004-07-31 16:24:01 +000010411 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10412 if (res < 0) {
10413 ctxt->errNo = XML_PARSER_EOF;
10414 ctxt->disableSAX = 1;
10415 return (XML_PARSER_EOF);
10416 }
Owen Taylor3473f882001-02-23 17:55:21 +000010417 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10418 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010419 ctxt->input->end =
10420 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010421#ifdef DEBUG_PUSH
10422 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10423#endif
10424
Owen Taylor3473f882001-02-23 17:55:21 +000010425 } else if (ctxt->instate != XML_PARSER_EOF) {
10426 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10427 xmlParserInputBufferPtr in = ctxt->input->buf;
10428 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10429 (in->raw != NULL)) {
10430 int nbchars;
10431
10432 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10433 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010434 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010435 xmlGenericError(xmlGenericErrorContext,
10436 "xmlParseChunk: encoder error\n");
10437 return(XML_ERR_INVALID_ENCODING);
10438 }
10439 }
10440 }
10441 }
10442 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010443 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010444 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010445 if (terminate) {
10446 /*
10447 * Check for termination
10448 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010449 int avail = 0;
10450
10451 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010452 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010453 avail = ctxt->input->length -
10454 (ctxt->input->cur - ctxt->input->base);
10455 else
10456 avail = ctxt->input->buf->buffer->use -
10457 (ctxt->input->cur - ctxt->input->base);
10458 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010459
Owen Taylor3473f882001-02-23 17:55:21 +000010460 if ((ctxt->instate != XML_PARSER_EOF) &&
10461 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010462 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010463 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010464 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010465 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010466 }
Owen Taylor3473f882001-02-23 17:55:21 +000010467 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010469 ctxt->sax->endDocument(ctxt->userData);
10470 }
10471 ctxt->instate = XML_PARSER_EOF;
10472 }
10473 return((xmlParserErrors) ctxt->errNo);
10474}
10475
10476/************************************************************************
10477 * *
10478 * I/O front end functions to the parser *
10479 * *
10480 ************************************************************************/
10481
10482/**
Owen Taylor3473f882001-02-23 17:55:21 +000010483 * xmlCreatePushParserCtxt:
10484 * @sax: a SAX handler
10485 * @user_data: The user data returned on SAX callbacks
10486 * @chunk: a pointer to an array of chars
10487 * @size: number of chars in the array
10488 * @filename: an optional file name or URI
10489 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010490 * Create a parser context for using the XML parser in push mode.
10491 * If @buffer and @size are non-NULL, the data is used to detect
10492 * the encoding. The remaining characters will be parsed so they
10493 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010494 * To allow content encoding detection, @size should be >= 4
10495 * The value of @filename is used for fetching external entities
10496 * and error/warning reports.
10497 *
10498 * Returns the new parser context or NULL
10499 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010500
Owen Taylor3473f882001-02-23 17:55:21 +000010501xmlParserCtxtPtr
10502xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10503 const char *chunk, int size, const char *filename) {
10504 xmlParserCtxtPtr ctxt;
10505 xmlParserInputPtr inputStream;
10506 xmlParserInputBufferPtr buf;
10507 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10508
10509 /*
10510 * plug some encoding conversion routines
10511 */
10512 if ((chunk != NULL) && (size >= 4))
10513 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10514
10515 buf = xmlAllocParserInputBuffer(enc);
10516 if (buf == NULL) return(NULL);
10517
10518 ctxt = xmlNewParserCtxt();
10519 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010520 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010521 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010522 return(NULL);
10523 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010524 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010525 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10526 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010527 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010528 xmlFreeParserInputBuffer(buf);
10529 xmlFreeParserCtxt(ctxt);
10530 return(NULL);
10531 }
Owen Taylor3473f882001-02-23 17:55:21 +000010532 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010533#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010534 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010535#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010536 xmlFree(ctxt->sax);
10537 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10538 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010539 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010540 xmlFreeParserInputBuffer(buf);
10541 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010542 return(NULL);
10543 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010544 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10545 if (sax->initialized == XML_SAX2_MAGIC)
10546 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10547 else
10548 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010549 if (user_data != NULL)
10550 ctxt->userData = user_data;
10551 }
10552 if (filename == NULL) {
10553 ctxt->directory = NULL;
10554 } else {
10555 ctxt->directory = xmlParserGetDirectory(filename);
10556 }
10557
10558 inputStream = xmlNewInputStream(ctxt);
10559 if (inputStream == NULL) {
10560 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010561 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010562 return(NULL);
10563 }
10564
10565 if (filename == NULL)
10566 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010567 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010568 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010569 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010570 if (inputStream->filename == NULL) {
10571 xmlFreeParserCtxt(ctxt);
10572 xmlFreeParserInputBuffer(buf);
10573 return(NULL);
10574 }
10575 }
Owen Taylor3473f882001-02-23 17:55:21 +000010576 inputStream->buf = buf;
10577 inputStream->base = inputStream->buf->buffer->content;
10578 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010579 inputStream->end =
10580 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010581
10582 inputPush(ctxt, inputStream);
10583
William M. Brack3a1cd212005-02-11 14:35:54 +000010584 /*
10585 * If the caller didn't provide an initial 'chunk' for determining
10586 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10587 * that it can be automatically determined later
10588 */
10589 if ((size == 0) || (chunk == NULL)) {
10590 ctxt->charset = XML_CHAR_ENCODING_NONE;
10591 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010592 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10593 int cur = ctxt->input->cur - ctxt->input->base;
10594
Owen Taylor3473f882001-02-23 17:55:21 +000010595 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010596
10597 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10598 ctxt->input->cur = ctxt->input->base + cur;
10599 ctxt->input->end =
10600 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010601#ifdef DEBUG_PUSH
10602 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10603#endif
10604 }
10605
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010606 if (enc != XML_CHAR_ENCODING_NONE) {
10607 xmlSwitchEncoding(ctxt, enc);
10608 }
10609
Owen Taylor3473f882001-02-23 17:55:21 +000010610 return(ctxt);
10611}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010612#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010613
10614/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010615 * xmlStopParser:
10616 * @ctxt: an XML parser context
10617 *
10618 * Blocks further parser processing
10619 */
10620void
10621xmlStopParser(xmlParserCtxtPtr ctxt) {
10622 if (ctxt == NULL)
10623 return;
10624 ctxt->instate = XML_PARSER_EOF;
10625 ctxt->disableSAX = 1;
10626 if (ctxt->input != NULL) {
10627 ctxt->input->cur = BAD_CAST"";
10628 ctxt->input->base = ctxt->input->cur;
10629 }
10630}
10631
10632/**
Owen Taylor3473f882001-02-23 17:55:21 +000010633 * xmlCreateIOParserCtxt:
10634 * @sax: a SAX handler
10635 * @user_data: The user data returned on SAX callbacks
10636 * @ioread: an I/O read function
10637 * @ioclose: an I/O close function
10638 * @ioctx: an I/O handler
10639 * @enc: the charset encoding if known
10640 *
10641 * Create a parser context for using the XML parser with an existing
10642 * I/O stream
10643 *
10644 * Returns the new parser context or NULL
10645 */
10646xmlParserCtxtPtr
10647xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10648 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10649 void *ioctx, xmlCharEncoding enc) {
10650 xmlParserCtxtPtr ctxt;
10651 xmlParserInputPtr inputStream;
10652 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010653
10654 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010655
10656 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10657 if (buf == NULL) return(NULL);
10658
10659 ctxt = xmlNewParserCtxt();
10660 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010661 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010662 return(NULL);
10663 }
10664 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010665#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010666 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010667#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010668 xmlFree(ctxt->sax);
10669 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10670 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010671 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010672 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010673 return(NULL);
10674 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010675 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10676 if (sax->initialized == XML_SAX2_MAGIC)
10677 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10678 else
10679 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010680 if (user_data != NULL)
10681 ctxt->userData = user_data;
10682 }
10683
10684 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10685 if (inputStream == NULL) {
10686 xmlFreeParserCtxt(ctxt);
10687 return(NULL);
10688 }
10689 inputPush(ctxt, inputStream);
10690
10691 return(ctxt);
10692}
10693
Daniel Veillard4432df22003-09-28 18:58:27 +000010694#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010695/************************************************************************
10696 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010697 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010698 * *
10699 ************************************************************************/
10700
10701/**
10702 * xmlIOParseDTD:
10703 * @sax: the SAX handler block or NULL
10704 * @input: an Input Buffer
10705 * @enc: the charset encoding if known
10706 *
10707 * Load and parse a DTD
10708 *
10709 * Returns the resulting xmlDtdPtr or NULL in case of error.
10710 * @input will be freed at parsing end.
10711 */
10712
10713xmlDtdPtr
10714xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10715 xmlCharEncoding enc) {
10716 xmlDtdPtr ret = NULL;
10717 xmlParserCtxtPtr ctxt;
10718 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010719 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010720
10721 if (input == NULL)
10722 return(NULL);
10723
10724 ctxt = xmlNewParserCtxt();
10725 if (ctxt == NULL) {
10726 return(NULL);
10727 }
10728
10729 /*
10730 * Set-up the SAX context
10731 */
10732 if (sax != NULL) {
10733 if (ctxt->sax != NULL)
10734 xmlFree(ctxt->sax);
10735 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010736 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010737 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010738 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010739
10740 /*
10741 * generate a parser input from the I/O handler
10742 */
10743
Daniel Veillard43caefb2003-12-07 19:32:22 +000010744 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010745 if (pinput == NULL) {
10746 if (sax != NULL) ctxt->sax = NULL;
10747 xmlFreeParserCtxt(ctxt);
10748 return(NULL);
10749 }
10750
10751 /*
10752 * plug some encoding conversion routines here.
10753 */
10754 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010755 if (enc != XML_CHAR_ENCODING_NONE) {
10756 xmlSwitchEncoding(ctxt, enc);
10757 }
Owen Taylor3473f882001-02-23 17:55:21 +000010758
10759 pinput->filename = NULL;
10760 pinput->line = 1;
10761 pinput->col = 1;
10762 pinput->base = ctxt->input->cur;
10763 pinput->cur = ctxt->input->cur;
10764 pinput->free = NULL;
10765
10766 /*
10767 * let's parse that entity knowing it's an external subset.
10768 */
10769 ctxt->inSubset = 2;
10770 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10771 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10772 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010773
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010774 if ((enc == XML_CHAR_ENCODING_NONE) &&
10775 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010776 /*
10777 * Get the 4 first bytes and decode the charset
10778 * if enc != XML_CHAR_ENCODING_NONE
10779 * plug some encoding conversion routines.
10780 */
10781 start[0] = RAW;
10782 start[1] = NXT(1);
10783 start[2] = NXT(2);
10784 start[3] = NXT(3);
10785 enc = xmlDetectCharEncoding(start, 4);
10786 if (enc != XML_CHAR_ENCODING_NONE) {
10787 xmlSwitchEncoding(ctxt, enc);
10788 }
10789 }
10790
Owen Taylor3473f882001-02-23 17:55:21 +000010791 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10792
10793 if (ctxt->myDoc != NULL) {
10794 if (ctxt->wellFormed) {
10795 ret = ctxt->myDoc->extSubset;
10796 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010797 if (ret != NULL) {
10798 xmlNodePtr tmp;
10799
10800 ret->doc = NULL;
10801 tmp = ret->children;
10802 while (tmp != NULL) {
10803 tmp->doc = NULL;
10804 tmp = tmp->next;
10805 }
10806 }
Owen Taylor3473f882001-02-23 17:55:21 +000010807 } else {
10808 ret = NULL;
10809 }
10810 xmlFreeDoc(ctxt->myDoc);
10811 ctxt->myDoc = NULL;
10812 }
10813 if (sax != NULL) ctxt->sax = NULL;
10814 xmlFreeParserCtxt(ctxt);
10815
10816 return(ret);
10817}
10818
10819/**
10820 * xmlSAXParseDTD:
10821 * @sax: the SAX handler block
10822 * @ExternalID: a NAME* containing the External ID of the DTD
10823 * @SystemID: a NAME* containing the URL to the DTD
10824 *
10825 * Load and parse an external subset.
10826 *
10827 * Returns the resulting xmlDtdPtr or NULL in case of error.
10828 */
10829
10830xmlDtdPtr
10831xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10832 const xmlChar *SystemID) {
10833 xmlDtdPtr ret = NULL;
10834 xmlParserCtxtPtr ctxt;
10835 xmlParserInputPtr input = NULL;
10836 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010837 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010838
10839 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10840
10841 ctxt = xmlNewParserCtxt();
10842 if (ctxt == NULL) {
10843 return(NULL);
10844 }
10845
10846 /*
10847 * Set-up the SAX context
10848 */
10849 if (sax != NULL) {
10850 if (ctxt->sax != NULL)
10851 xmlFree(ctxt->sax);
10852 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010853 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010854 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010855
10856 /*
10857 * Canonicalise the system ID
10858 */
10859 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010860 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010861 xmlFreeParserCtxt(ctxt);
10862 return(NULL);
10863 }
Owen Taylor3473f882001-02-23 17:55:21 +000010864
10865 /*
10866 * Ask the Entity resolver to load the damn thing
10867 */
10868
10869 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010870 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010871 if (input == NULL) {
10872 if (sax != NULL) ctxt->sax = NULL;
10873 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010874 if (systemIdCanonic != NULL)
10875 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010876 return(NULL);
10877 }
10878
10879 /*
10880 * plug some encoding conversion routines here.
10881 */
10882 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010883 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10884 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10885 xmlSwitchEncoding(ctxt, enc);
10886 }
Owen Taylor3473f882001-02-23 17:55:21 +000010887
10888 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010889 input->filename = (char *) systemIdCanonic;
10890 else
10891 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010892 input->line = 1;
10893 input->col = 1;
10894 input->base = ctxt->input->cur;
10895 input->cur = ctxt->input->cur;
10896 input->free = NULL;
10897
10898 /*
10899 * let's parse that entity knowing it's an external subset.
10900 */
10901 ctxt->inSubset = 2;
10902 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10903 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10904 ExternalID, SystemID);
10905 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10906
10907 if (ctxt->myDoc != NULL) {
10908 if (ctxt->wellFormed) {
10909 ret = ctxt->myDoc->extSubset;
10910 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010911 if (ret != NULL) {
10912 xmlNodePtr tmp;
10913
10914 ret->doc = NULL;
10915 tmp = ret->children;
10916 while (tmp != NULL) {
10917 tmp->doc = NULL;
10918 tmp = tmp->next;
10919 }
10920 }
Owen Taylor3473f882001-02-23 17:55:21 +000010921 } else {
10922 ret = NULL;
10923 }
10924 xmlFreeDoc(ctxt->myDoc);
10925 ctxt->myDoc = NULL;
10926 }
10927 if (sax != NULL) ctxt->sax = NULL;
10928 xmlFreeParserCtxt(ctxt);
10929
10930 return(ret);
10931}
10932
Daniel Veillard4432df22003-09-28 18:58:27 +000010933
Owen Taylor3473f882001-02-23 17:55:21 +000010934/**
10935 * xmlParseDTD:
10936 * @ExternalID: a NAME* containing the External ID of the DTD
10937 * @SystemID: a NAME* containing the URL to the DTD
10938 *
10939 * Load and parse an external subset.
10940 *
10941 * Returns the resulting xmlDtdPtr or NULL in case of error.
10942 */
10943
10944xmlDtdPtr
10945xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10946 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10947}
Daniel Veillard4432df22003-09-28 18:58:27 +000010948#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010949
10950/************************************************************************
10951 * *
10952 * Front ends when parsing an Entity *
10953 * *
10954 ************************************************************************/
10955
10956/**
Owen Taylor3473f882001-02-23 17:55:21 +000010957 * xmlParseCtxtExternalEntity:
10958 * @ctx: the existing parsing context
10959 * @URL: the URL for the entity to load
10960 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010961 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010962 *
10963 * Parse an external general entity within an existing parsing context
10964 * An external general parsed entity is well-formed if it matches the
10965 * production labeled extParsedEnt.
10966 *
10967 * [78] extParsedEnt ::= TextDecl? content
10968 *
10969 * Returns 0 if the entity is well formed, -1 in case of args problem and
10970 * the parser error code otherwise
10971 */
10972
10973int
10974xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010975 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010976 xmlParserCtxtPtr ctxt;
10977 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010978 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010979 xmlSAXHandlerPtr oldsax = NULL;
10980 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010981 xmlChar start[4];
10982 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010983
Daniel Veillardce682bc2004-11-05 17:22:25 +000010984 if (ctx == NULL) return(-1);
10985
Owen Taylor3473f882001-02-23 17:55:21 +000010986 if (ctx->depth > 40) {
10987 return(XML_ERR_ENTITY_LOOP);
10988 }
10989
Daniel Veillardcda96922001-08-21 10:56:31 +000010990 if (lst != NULL)
10991 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010992 if ((URL == NULL) && (ID == NULL))
10993 return(-1);
10994 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10995 return(-1);
10996
10997
10998 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10999 if (ctxt == NULL) return(-1);
11000 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011001 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011002 oldsax = ctxt->sax;
11003 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011004 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011005 newDoc = xmlNewDoc(BAD_CAST "1.0");
11006 if (newDoc == NULL) {
11007 xmlFreeParserCtxt(ctxt);
11008 return(-1);
11009 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011010 if (ctx->myDoc->dict) {
11011 newDoc->dict = ctx->myDoc->dict;
11012 xmlDictReference(newDoc->dict);
11013 }
Owen Taylor3473f882001-02-23 17:55:21 +000011014 if (ctx->myDoc != NULL) {
11015 newDoc->intSubset = ctx->myDoc->intSubset;
11016 newDoc->extSubset = ctx->myDoc->extSubset;
11017 }
11018 if (ctx->myDoc->URL != NULL) {
11019 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11020 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011021 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11022 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011023 ctxt->sax = oldsax;
11024 xmlFreeParserCtxt(ctxt);
11025 newDoc->intSubset = NULL;
11026 newDoc->extSubset = NULL;
11027 xmlFreeDoc(newDoc);
11028 return(-1);
11029 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011030 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011031 nodePush(ctxt, newDoc->children);
11032 if (ctx->myDoc == NULL) {
11033 ctxt->myDoc = newDoc;
11034 } else {
11035 ctxt->myDoc = ctx->myDoc;
11036 newDoc->children->doc = ctx->myDoc;
11037 }
11038
Daniel Veillard87a764e2001-06-20 17:41:10 +000011039 /*
11040 * Get the 4 first bytes and decode the charset
11041 * if enc != XML_CHAR_ENCODING_NONE
11042 * plug some encoding conversion routines.
11043 */
11044 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011045 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11046 start[0] = RAW;
11047 start[1] = NXT(1);
11048 start[2] = NXT(2);
11049 start[3] = NXT(3);
11050 enc = xmlDetectCharEncoding(start, 4);
11051 if (enc != XML_CHAR_ENCODING_NONE) {
11052 xmlSwitchEncoding(ctxt, enc);
11053 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011054 }
11055
Owen Taylor3473f882001-02-23 17:55:21 +000011056 /*
11057 * Parse a possible text declaration first
11058 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011059 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011060 xmlParseTextDecl(ctxt);
11061 }
11062
11063 /*
11064 * Doing validity checking on chunk doesn't make sense
11065 */
11066 ctxt->instate = XML_PARSER_CONTENT;
11067 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011068 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011069 ctxt->loadsubset = ctx->loadsubset;
11070 ctxt->depth = ctx->depth + 1;
11071 ctxt->replaceEntities = ctx->replaceEntities;
11072 if (ctxt->validate) {
11073 ctxt->vctxt.error = ctx->vctxt.error;
11074 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011075 } else {
11076 ctxt->vctxt.error = NULL;
11077 ctxt->vctxt.warning = NULL;
11078 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011079 ctxt->vctxt.nodeTab = NULL;
11080 ctxt->vctxt.nodeNr = 0;
11081 ctxt->vctxt.nodeMax = 0;
11082 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011083 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11084 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011085 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11086 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11087 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011088 ctxt->dictNames = ctx->dictNames;
11089 ctxt->attsDefault = ctx->attsDefault;
11090 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011091 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011092
11093 xmlParseContent(ctxt);
11094
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011095 ctx->validate = ctxt->validate;
11096 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011097 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011098 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011099 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011100 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011101 }
11102 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011104 }
11105
11106 if (!ctxt->wellFormed) {
11107 if (ctxt->errNo == 0)
11108 ret = 1;
11109 else
11110 ret = ctxt->errNo;
11111 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011112 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011113 xmlNodePtr cur;
11114
11115 /*
11116 * Return the newly created nodeset after unlinking it from
11117 * they pseudo parent.
11118 */
11119 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011120 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011121 while (cur != NULL) {
11122 cur->parent = NULL;
11123 cur = cur->next;
11124 }
11125 newDoc->children->children = NULL;
11126 }
11127 ret = 0;
11128 }
11129 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011130 ctxt->dict = NULL;
11131 ctxt->attsDefault = NULL;
11132 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011133 xmlFreeParserCtxt(ctxt);
11134 newDoc->intSubset = NULL;
11135 newDoc->extSubset = NULL;
11136 xmlFreeDoc(newDoc);
11137
11138 return(ret);
11139}
11140
11141/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011142 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011143 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011144 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011145 * @sax: the SAX handler bloc (possibly NULL)
11146 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11147 * @depth: Used for loop detection, use 0
11148 * @URL: the URL for the entity to load
11149 * @ID: the System ID for the entity to load
11150 * @list: the return value for the set of parsed nodes
11151 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011152 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011153 *
11154 * Returns 0 if the entity is well formed, -1 in case of args problem and
11155 * the parser error code otherwise
11156 */
11157
Daniel Veillard7d515752003-09-26 19:12:37 +000011158static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011159xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11160 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011161 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011162 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011163 xmlParserCtxtPtr ctxt;
11164 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011165 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011166 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011167 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011168 xmlChar start[4];
11169 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011170
11171 if (depth > 40) {
11172 return(XML_ERR_ENTITY_LOOP);
11173 }
11174
11175
11176
11177 if (list != NULL)
11178 *list = NULL;
11179 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011180 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011181 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011182 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011183
11184
11185 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011186 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011187 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011188 if (oldctxt != NULL) {
11189 ctxt->_private = oldctxt->_private;
11190 ctxt->loadsubset = oldctxt->loadsubset;
11191 ctxt->validate = oldctxt->validate;
11192 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011193 ctxt->record_info = oldctxt->record_info;
11194 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11195 ctxt->node_seq.length = oldctxt->node_seq.length;
11196 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011197 } else {
11198 /*
11199 * Doing validity checking on chunk without context
11200 * doesn't make sense
11201 */
11202 ctxt->_private = NULL;
11203 ctxt->validate = 0;
11204 ctxt->external = 2;
11205 ctxt->loadsubset = 0;
11206 }
Owen Taylor3473f882001-02-23 17:55:21 +000011207 if (sax != NULL) {
11208 oldsax = ctxt->sax;
11209 ctxt->sax = sax;
11210 if (user_data != NULL)
11211 ctxt->userData = user_data;
11212 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011213 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011214 newDoc = xmlNewDoc(BAD_CAST "1.0");
11215 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011216 ctxt->node_seq.maximum = 0;
11217 ctxt->node_seq.length = 0;
11218 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011219 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011220 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011221 }
11222 if (doc != NULL) {
11223 newDoc->intSubset = doc->intSubset;
11224 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011225 newDoc->dict = doc->dict;
11226 } else if (oldctxt != NULL) {
11227 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011228 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011229 xmlDictReference(newDoc->dict);
11230
Owen Taylor3473f882001-02-23 17:55:21 +000011231 if (doc->URL != NULL) {
11232 newDoc->URL = xmlStrdup(doc->URL);
11233 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011234 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11235 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011236 if (sax != NULL)
11237 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011238 ctxt->node_seq.maximum = 0;
11239 ctxt->node_seq.length = 0;
11240 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011241 xmlFreeParserCtxt(ctxt);
11242 newDoc->intSubset = NULL;
11243 newDoc->extSubset = NULL;
11244 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011245 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011246 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011247 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011248 nodePush(ctxt, newDoc->children);
11249 if (doc == NULL) {
11250 ctxt->myDoc = newDoc;
11251 } else {
11252 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011253 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011254 }
11255
Daniel Veillard87a764e2001-06-20 17:41:10 +000011256 /*
11257 * Get the 4 first bytes and decode the charset
11258 * if enc != XML_CHAR_ENCODING_NONE
11259 * plug some encoding conversion routines.
11260 */
11261 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011262 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11263 start[0] = RAW;
11264 start[1] = NXT(1);
11265 start[2] = NXT(2);
11266 start[3] = NXT(3);
11267 enc = xmlDetectCharEncoding(start, 4);
11268 if (enc != XML_CHAR_ENCODING_NONE) {
11269 xmlSwitchEncoding(ctxt, enc);
11270 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011271 }
11272
Owen Taylor3473f882001-02-23 17:55:21 +000011273 /*
11274 * Parse a possible text declaration first
11275 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011276 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011277 xmlParseTextDecl(ctxt);
11278 }
11279
Owen Taylor3473f882001-02-23 17:55:21 +000011280 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011281 ctxt->depth = depth;
11282
11283 xmlParseContent(ctxt);
11284
Daniel Veillard561b7f82002-03-20 21:55:57 +000011285 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011286 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011287 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011288 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011289 }
11290 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011291 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011292 }
11293
11294 if (!ctxt->wellFormed) {
11295 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011296 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011297 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011298 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011299 } else {
11300 if (list != NULL) {
11301 xmlNodePtr cur;
11302
11303 /*
11304 * Return the newly created nodeset after unlinking it from
11305 * they pseudo parent.
11306 */
11307 cur = newDoc->children->children;
11308 *list = cur;
11309 while (cur != NULL) {
11310 cur->parent = NULL;
11311 cur = cur->next;
11312 }
11313 newDoc->children->children = NULL;
11314 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011315 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011316 }
11317 if (sax != NULL)
11318 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011319 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11320 oldctxt->node_seq.length = ctxt->node_seq.length;
11321 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011322 ctxt->node_seq.maximum = 0;
11323 ctxt->node_seq.length = 0;
11324 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011325 xmlFreeParserCtxt(ctxt);
11326 newDoc->intSubset = NULL;
11327 newDoc->extSubset = NULL;
11328 xmlFreeDoc(newDoc);
11329
11330 return(ret);
11331}
11332
Daniel Veillard81273902003-09-30 00:43:48 +000011333#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011334/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011335 * xmlParseExternalEntity:
11336 * @doc: the document the chunk pertains to
11337 * @sax: the SAX handler bloc (possibly NULL)
11338 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11339 * @depth: Used for loop detection, use 0
11340 * @URL: the URL for the entity to load
11341 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011342 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011343 *
11344 * Parse an external general entity
11345 * An external general parsed entity is well-formed if it matches the
11346 * production labeled extParsedEnt.
11347 *
11348 * [78] extParsedEnt ::= TextDecl? content
11349 *
11350 * Returns 0 if the entity is well formed, -1 in case of args problem and
11351 * the parser error code otherwise
11352 */
11353
11354int
11355xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011356 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011357 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011358 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011359}
11360
11361/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011362 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011363 * @doc: the document the chunk pertains to
11364 * @sax: the SAX handler bloc (possibly NULL)
11365 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11366 * @depth: Used for loop detection, use 0
11367 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011368 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011369 *
11370 * Parse a well-balanced chunk of an XML document
11371 * called by the parser
11372 * The allowed sequence for the Well Balanced Chunk is the one defined by
11373 * the content production in the XML grammar:
11374 *
11375 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11376 *
11377 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11378 * the parser error code otherwise
11379 */
11380
11381int
11382xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011383 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011384 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11385 depth, string, lst, 0 );
11386}
Daniel Veillard81273902003-09-30 00:43:48 +000011387#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011388
11389/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011390 * xmlParseBalancedChunkMemoryInternal:
11391 * @oldctxt: the existing parsing context
11392 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11393 * @user_data: the user data field for the parser context
11394 * @lst: the return value for the set of parsed nodes
11395 *
11396 *
11397 * Parse a well-balanced chunk of an XML document
11398 * called by the parser
11399 * The allowed sequence for the Well Balanced Chunk is the one defined by
11400 * the content production in the XML grammar:
11401 *
11402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11403 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011404 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11405 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011406 *
11407 * In case recover is set to 1, the nodelist will not be empty even if
11408 * the parsed chunk is not well balanced.
11409 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011410static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011411xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11412 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11413 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011414 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011415 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011416 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011417 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011418 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011419 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011420 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011421
11422 if (oldctxt->depth > 40) {
11423 return(XML_ERR_ENTITY_LOOP);
11424 }
11425
11426
11427 if (lst != NULL)
11428 *lst = NULL;
11429 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011430 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011431
11432 size = xmlStrlen(string);
11433
11434 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011435 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011436 if (user_data != NULL)
11437 ctxt->userData = user_data;
11438 else
11439 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011440 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11441 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011442 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11443 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11444 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011445
11446 oldsax = ctxt->sax;
11447 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011448 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011449 ctxt->replaceEntities = oldctxt->replaceEntities;
11450 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011451
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011452 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011453 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011454 newDoc = xmlNewDoc(BAD_CAST "1.0");
11455 if (newDoc == NULL) {
11456 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011457 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011458 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011459 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011460 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011461 newDoc->dict = ctxt->dict;
11462 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011463 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011464 } else {
11465 ctxt->myDoc = oldctxt->myDoc;
11466 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011467 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011468 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011469 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11470 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011471 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011472 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011473 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011474 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011475 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011476 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011477 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011478 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011479 ctxt->myDoc->children = NULL;
11480 ctxt->myDoc->last = NULL;
11481 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011482 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011483 ctxt->instate = XML_PARSER_CONTENT;
11484 ctxt->depth = oldctxt->depth + 1;
11485
Daniel Veillard328f48c2002-11-15 15:24:34 +000011486 ctxt->validate = 0;
11487 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011488 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11489 /*
11490 * ID/IDREF registration will be done in xmlValidateElement below
11491 */
11492 ctxt->loadsubset |= XML_SKIP_IDS;
11493 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011494 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011495 ctxt->attsDefault = oldctxt->attsDefault;
11496 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011497
Daniel Veillard68e9e742002-11-16 15:35:11 +000011498 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011499 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011500 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011501 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011502 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011503 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011504 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011505 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011506 }
11507
11508 if (!ctxt->wellFormed) {
11509 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011510 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011511 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011512 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011513 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011514 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011515 }
11516
William M. Brack7b9154b2003-09-27 19:23:50 +000011517 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011518 xmlNodePtr cur;
11519
11520 /*
11521 * Return the newly created nodeset after unlinking it from
11522 * they pseudo parent.
11523 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011524 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011525 *lst = cur;
11526 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011527#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011528 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11529 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11530 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011531 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11532 oldctxt->myDoc, cur);
11533 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011534#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011535 cur->parent = NULL;
11536 cur = cur->next;
11537 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011538 ctxt->myDoc->children->children = NULL;
11539 }
11540 if (ctxt->myDoc != NULL) {
11541 xmlFreeNode(ctxt->myDoc->children);
11542 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011543 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011544 }
11545
11546 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011547 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011548 ctxt->attsDefault = NULL;
11549 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011550 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011551 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011552 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011553 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011554
11555 return(ret);
11556}
11557
Daniel Veillard29b17482004-08-16 00:39:03 +000011558/**
11559 * xmlParseInNodeContext:
11560 * @node: the context node
11561 * @data: the input string
11562 * @datalen: the input string length in bytes
11563 * @options: a combination of xmlParserOption
11564 * @lst: the return value for the set of parsed nodes
11565 *
11566 * Parse a well-balanced chunk of an XML document
11567 * within the context (DTD, namespaces, etc ...) of the given node.
11568 *
11569 * The allowed sequence for the data is a Well Balanced Chunk defined by
11570 * the content production in the XML grammar:
11571 *
11572 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11573 *
11574 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11575 * error code otherwise
11576 */
11577xmlParserErrors
11578xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11579 int options, xmlNodePtr *lst) {
11580#ifdef SAX2
11581 xmlParserCtxtPtr ctxt;
11582 xmlDocPtr doc = NULL;
11583 xmlNodePtr fake, cur;
11584 int nsnr = 0;
11585
11586 xmlParserErrors ret = XML_ERR_OK;
11587
11588 /*
11589 * check all input parameters, grab the document
11590 */
11591 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11592 return(XML_ERR_INTERNAL_ERROR);
11593 switch (node->type) {
11594 case XML_ELEMENT_NODE:
11595 case XML_ATTRIBUTE_NODE:
11596 case XML_TEXT_NODE:
11597 case XML_CDATA_SECTION_NODE:
11598 case XML_ENTITY_REF_NODE:
11599 case XML_PI_NODE:
11600 case XML_COMMENT_NODE:
11601 case XML_DOCUMENT_NODE:
11602 case XML_HTML_DOCUMENT_NODE:
11603 break;
11604 default:
11605 return(XML_ERR_INTERNAL_ERROR);
11606
11607 }
11608 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11609 (node->type != XML_DOCUMENT_NODE) &&
11610 (node->type != XML_HTML_DOCUMENT_NODE))
11611 node = node->parent;
11612 if (node == NULL)
11613 return(XML_ERR_INTERNAL_ERROR);
11614 if (node->type == XML_ELEMENT_NODE)
11615 doc = node->doc;
11616 else
11617 doc = (xmlDocPtr) node;
11618 if (doc == NULL)
11619 return(XML_ERR_INTERNAL_ERROR);
11620
11621 /*
11622 * allocate a context and set-up everything not related to the
11623 * node position in the tree
11624 */
11625 if (doc->type == XML_DOCUMENT_NODE)
11626 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11627#ifdef LIBXML_HTML_ENABLED
11628 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11629 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11630#endif
11631 else
11632 return(XML_ERR_INTERNAL_ERROR);
11633
11634 if (ctxt == NULL)
11635 return(XML_ERR_NO_MEMORY);
11636 fake = xmlNewComment(NULL);
11637 if (fake == NULL) {
11638 xmlFreeParserCtxt(ctxt);
11639 return(XML_ERR_NO_MEMORY);
11640 }
11641 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011642
11643 /*
11644 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11645 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11646 * we must wait until the last moment to free the original one.
11647 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011648 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011649 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011650 xmlDictFree(ctxt->dict);
11651 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011652 } else
11653 options |= XML_PARSE_NODICT;
11654
11655 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011656 xmlDetectSAX2(ctxt);
11657 ctxt->myDoc = doc;
11658
11659 if (node->type == XML_ELEMENT_NODE) {
11660 nodePush(ctxt, node);
11661 /*
11662 * initialize the SAX2 namespaces stack
11663 */
11664 cur = node;
11665 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11666 xmlNsPtr ns = cur->nsDef;
11667 const xmlChar *iprefix, *ihref;
11668
11669 while (ns != NULL) {
11670 if (ctxt->dict) {
11671 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11672 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11673 } else {
11674 iprefix = ns->prefix;
11675 ihref = ns->href;
11676 }
11677
11678 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11679 nsPush(ctxt, iprefix, ihref);
11680 nsnr++;
11681 }
11682 ns = ns->next;
11683 }
11684 cur = cur->parent;
11685 }
11686 ctxt->instate = XML_PARSER_CONTENT;
11687 }
11688
11689 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11690 /*
11691 * ID/IDREF registration will be done in xmlValidateElement below
11692 */
11693 ctxt->loadsubset |= XML_SKIP_IDS;
11694 }
11695
11696 xmlParseContent(ctxt);
11697 nsPop(ctxt, nsnr);
11698 if ((RAW == '<') && (NXT(1) == '/')) {
11699 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11700 } else if (RAW != 0) {
11701 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11702 }
11703 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11704 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11705 ctxt->wellFormed = 0;
11706 }
11707
11708 if (!ctxt->wellFormed) {
11709 if (ctxt->errNo == 0)
11710 ret = XML_ERR_INTERNAL_ERROR;
11711 else
11712 ret = (xmlParserErrors)ctxt->errNo;
11713 } else {
11714 ret = XML_ERR_OK;
11715 }
11716
11717 /*
11718 * Return the newly created nodeset after unlinking it from
11719 * the pseudo sibling.
11720 */
11721
11722 cur = fake->next;
11723 fake->next = NULL;
11724 node->last = fake;
11725
11726 if (cur != NULL) {
11727 cur->prev = NULL;
11728 }
11729
11730 *lst = cur;
11731
11732 while (cur != NULL) {
11733 cur->parent = NULL;
11734 cur = cur->next;
11735 }
11736
11737 xmlUnlinkNode(fake);
11738 xmlFreeNode(fake);
11739
11740
11741 if (ret != XML_ERR_OK) {
11742 xmlFreeNodeList(*lst);
11743 *lst = NULL;
11744 }
William M. Brackc3f81342004-10-03 01:22:44 +000011745
William M. Brackb7b54de2004-10-06 16:38:01 +000011746 if (doc->dict != NULL)
11747 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011748 xmlFreeParserCtxt(ctxt);
11749
11750 return(ret);
11751#else /* !SAX2 */
11752 return(XML_ERR_INTERNAL_ERROR);
11753#endif
11754}
11755
Daniel Veillard81273902003-09-30 00:43:48 +000011756#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011757/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011758 * xmlParseBalancedChunkMemoryRecover:
11759 * @doc: the document the chunk pertains to
11760 * @sax: the SAX handler bloc (possibly NULL)
11761 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11762 * @depth: Used for loop detection, use 0
11763 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11764 * @lst: the return value for the set of parsed nodes
11765 * @recover: return nodes even if the data is broken (use 0)
11766 *
11767 *
11768 * Parse a well-balanced chunk of an XML document
11769 * called by the parser
11770 * The allowed sequence for the Well Balanced Chunk is the one defined by
11771 * the content production in the XML grammar:
11772 *
11773 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11774 *
11775 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11776 * the parser error code otherwise
11777 *
11778 * In case recover is set to 1, the nodelist will not be empty even if
11779 * the parsed chunk is not well balanced.
11780 */
11781int
11782xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11783 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11784 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011785 xmlParserCtxtPtr ctxt;
11786 xmlDocPtr newDoc;
11787 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011788 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011789 int size;
11790 int ret = 0;
11791
11792 if (depth > 40) {
11793 return(XML_ERR_ENTITY_LOOP);
11794 }
11795
11796
Daniel Veillardcda96922001-08-21 10:56:31 +000011797 if (lst != NULL)
11798 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 if (string == NULL)
11800 return(-1);
11801
11802 size = xmlStrlen(string);
11803
11804 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11805 if (ctxt == NULL) return(-1);
11806 ctxt->userData = ctxt;
11807 if (sax != NULL) {
11808 oldsax = ctxt->sax;
11809 ctxt->sax = sax;
11810 if (user_data != NULL)
11811 ctxt->userData = user_data;
11812 }
11813 newDoc = xmlNewDoc(BAD_CAST "1.0");
11814 if (newDoc == NULL) {
11815 xmlFreeParserCtxt(ctxt);
11816 return(-1);
11817 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011818 if ((doc != NULL) && (doc->dict != NULL)) {
11819 xmlDictFree(ctxt->dict);
11820 ctxt->dict = doc->dict;
11821 xmlDictReference(ctxt->dict);
11822 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11823 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11824 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11825 ctxt->dictNames = 1;
11826 } else {
11827 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11828 }
Owen Taylor3473f882001-02-23 17:55:21 +000011829 if (doc != NULL) {
11830 newDoc->intSubset = doc->intSubset;
11831 newDoc->extSubset = doc->extSubset;
11832 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011833 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11834 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if (sax != NULL)
11836 ctxt->sax = oldsax;
11837 xmlFreeParserCtxt(ctxt);
11838 newDoc->intSubset = NULL;
11839 newDoc->extSubset = NULL;
11840 xmlFreeDoc(newDoc);
11841 return(-1);
11842 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011843 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11844 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011845 if (doc == NULL) {
11846 ctxt->myDoc = newDoc;
11847 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011848 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011849 newDoc->children->doc = doc;
11850 }
11851 ctxt->instate = XML_PARSER_CONTENT;
11852 ctxt->depth = depth;
11853
11854 /*
11855 * Doing validity checking on chunk doesn't make sense
11856 */
11857 ctxt->validate = 0;
11858 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011859 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011860
Daniel Veillardb39bc392002-10-26 19:29:51 +000011861 if ( doc != NULL ){
11862 content = doc->children;
11863 doc->children = NULL;
11864 xmlParseContent(ctxt);
11865 doc->children = content;
11866 }
11867 else {
11868 xmlParseContent(ctxt);
11869 }
Owen Taylor3473f882001-02-23 17:55:21 +000011870 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011871 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011872 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011873 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011874 }
11875 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011876 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011877 }
11878
11879 if (!ctxt->wellFormed) {
11880 if (ctxt->errNo == 0)
11881 ret = 1;
11882 else
11883 ret = ctxt->errNo;
11884 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011885 ret = 0;
11886 }
11887
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011888 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11889 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011890
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011891 /*
11892 * Return the newly created nodeset after unlinking it from
11893 * they pseudo parent.
11894 */
11895 cur = newDoc->children->children;
11896 *lst = cur;
11897 while (cur != NULL) {
11898 xmlSetTreeDoc(cur, doc);
11899 cur->parent = NULL;
11900 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011901 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011902 newDoc->children->children = NULL;
11903 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011904
Owen Taylor3473f882001-02-23 17:55:21 +000011905 if (sax != NULL)
11906 ctxt->sax = oldsax;
11907 xmlFreeParserCtxt(ctxt);
11908 newDoc->intSubset = NULL;
11909 newDoc->extSubset = NULL;
11910 xmlFreeDoc(newDoc);
11911
11912 return(ret);
11913}
11914
11915/**
11916 * xmlSAXParseEntity:
11917 * @sax: the SAX handler block
11918 * @filename: the filename
11919 *
11920 * parse an XML external entity out of context and build a tree.
11921 * It use the given SAX function block to handle the parsing callback.
11922 * If sax is NULL, fallback to the default DOM tree building routines.
11923 *
11924 * [78] extParsedEnt ::= TextDecl? content
11925 *
11926 * This correspond to a "Well Balanced" chunk
11927 *
11928 * Returns the resulting document tree
11929 */
11930
11931xmlDocPtr
11932xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11933 xmlDocPtr ret;
11934 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011935
11936 ctxt = xmlCreateFileParserCtxt(filename);
11937 if (ctxt == NULL) {
11938 return(NULL);
11939 }
11940 if (sax != NULL) {
11941 if (ctxt->sax != NULL)
11942 xmlFree(ctxt->sax);
11943 ctxt->sax = sax;
11944 ctxt->userData = NULL;
11945 }
11946
Owen Taylor3473f882001-02-23 17:55:21 +000011947 xmlParseExtParsedEnt(ctxt);
11948
11949 if (ctxt->wellFormed)
11950 ret = ctxt->myDoc;
11951 else {
11952 ret = NULL;
11953 xmlFreeDoc(ctxt->myDoc);
11954 ctxt->myDoc = NULL;
11955 }
11956 if (sax != NULL)
11957 ctxt->sax = NULL;
11958 xmlFreeParserCtxt(ctxt);
11959
11960 return(ret);
11961}
11962
11963/**
11964 * xmlParseEntity:
11965 * @filename: the filename
11966 *
11967 * parse an XML external entity out of context and build a tree.
11968 *
11969 * [78] extParsedEnt ::= TextDecl? content
11970 *
11971 * This correspond to a "Well Balanced" chunk
11972 *
11973 * Returns the resulting document tree
11974 */
11975
11976xmlDocPtr
11977xmlParseEntity(const char *filename) {
11978 return(xmlSAXParseEntity(NULL, filename));
11979}
Daniel Veillard81273902003-09-30 00:43:48 +000011980#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011981
11982/**
11983 * xmlCreateEntityParserCtxt:
11984 * @URL: the entity URL
11985 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011986 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011987 *
11988 * Create a parser context for an external entity
11989 * Automatic support for ZLIB/Compress compressed document is provided
11990 * by default if found at compile-time.
11991 *
11992 * Returns the new parser context or NULL
11993 */
11994xmlParserCtxtPtr
11995xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11996 const xmlChar *base) {
11997 xmlParserCtxtPtr ctxt;
11998 xmlParserInputPtr inputStream;
11999 char *directory = NULL;
12000 xmlChar *uri;
12001
12002 ctxt = xmlNewParserCtxt();
12003 if (ctxt == NULL) {
12004 return(NULL);
12005 }
12006
12007 uri = xmlBuildURI(URL, base);
12008
12009 if (uri == NULL) {
12010 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12011 if (inputStream == NULL) {
12012 xmlFreeParserCtxt(ctxt);
12013 return(NULL);
12014 }
12015
12016 inputPush(ctxt, inputStream);
12017
12018 if ((ctxt->directory == NULL) && (directory == NULL))
12019 directory = xmlParserGetDirectory((char *)URL);
12020 if ((ctxt->directory == NULL) && (directory != NULL))
12021 ctxt->directory = directory;
12022 } else {
12023 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12024 if (inputStream == NULL) {
12025 xmlFree(uri);
12026 xmlFreeParserCtxt(ctxt);
12027 return(NULL);
12028 }
12029
12030 inputPush(ctxt, inputStream);
12031
12032 if ((ctxt->directory == NULL) && (directory == NULL))
12033 directory = xmlParserGetDirectory((char *)uri);
12034 if ((ctxt->directory == NULL) && (directory != NULL))
12035 ctxt->directory = directory;
12036 xmlFree(uri);
12037 }
Owen Taylor3473f882001-02-23 17:55:21 +000012038 return(ctxt);
12039}
12040
12041/************************************************************************
12042 * *
12043 * Front ends when parsing from a file *
12044 * *
12045 ************************************************************************/
12046
12047/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012048 * xmlCreateURLParserCtxt:
12049 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012050 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012051 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012052 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012053 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012054 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012055 *
12056 * Returns the new parser context or NULL
12057 */
12058xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012059xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012060{
12061 xmlParserCtxtPtr ctxt;
12062 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012063 char *directory = NULL;
12064
Owen Taylor3473f882001-02-23 17:55:21 +000012065 ctxt = xmlNewParserCtxt();
12066 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012067 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012068 return(NULL);
12069 }
12070
Daniel Veillarddf292f72005-01-16 19:00:15 +000012071 if (options)
12072 xmlCtxtUseOptions(ctxt, options);
12073 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012074
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012075 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012076 if (inputStream == NULL) {
12077 xmlFreeParserCtxt(ctxt);
12078 return(NULL);
12079 }
12080
Owen Taylor3473f882001-02-23 17:55:21 +000012081 inputPush(ctxt, inputStream);
12082 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012083 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012084 if ((ctxt->directory == NULL) && (directory != NULL))
12085 ctxt->directory = directory;
12086
12087 return(ctxt);
12088}
12089
Daniel Veillard61b93382003-11-03 14:28:31 +000012090/**
12091 * xmlCreateFileParserCtxt:
12092 * @filename: the filename
12093 *
12094 * Create a parser context for a file content.
12095 * Automatic support for ZLIB/Compress compressed document is provided
12096 * by default if found at compile-time.
12097 *
12098 * Returns the new parser context or NULL
12099 */
12100xmlParserCtxtPtr
12101xmlCreateFileParserCtxt(const char *filename)
12102{
12103 return(xmlCreateURLParserCtxt(filename, 0));
12104}
12105
Daniel Veillard81273902003-09-30 00:43:48 +000012106#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012107/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012108 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012109 * @sax: the SAX handler block
12110 * @filename: the filename
12111 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12112 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012113 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012114 *
12115 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12116 * compressed document is provided by default if found at compile-time.
12117 * It use the given SAX function block to handle the parsing callback.
12118 * If sax is NULL, fallback to the default DOM tree building routines.
12119 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012120 * User data (void *) is stored within the parser context in the
12121 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012122 *
Owen Taylor3473f882001-02-23 17:55:21 +000012123 * Returns the resulting document tree
12124 */
12125
12126xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012127xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12128 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012129 xmlDocPtr ret;
12130 xmlParserCtxtPtr ctxt;
12131 char *directory = NULL;
12132
Daniel Veillard635ef722001-10-29 11:48:19 +000012133 xmlInitParser();
12134
Owen Taylor3473f882001-02-23 17:55:21 +000012135 ctxt = xmlCreateFileParserCtxt(filename);
12136 if (ctxt == NULL) {
12137 return(NULL);
12138 }
12139 if (sax != NULL) {
12140 if (ctxt->sax != NULL)
12141 xmlFree(ctxt->sax);
12142 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012143 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012144 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012145 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012146 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012147 }
Owen Taylor3473f882001-02-23 17:55:21 +000012148
12149 if ((ctxt->directory == NULL) && (directory == NULL))
12150 directory = xmlParserGetDirectory(filename);
12151 if ((ctxt->directory == NULL) && (directory != NULL))
12152 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12153
Daniel Veillarddad3f682002-11-17 16:47:27 +000012154 ctxt->recovery = recovery;
12155
Owen Taylor3473f882001-02-23 17:55:21 +000012156 xmlParseDocument(ctxt);
12157
William M. Brackc07329e2003-09-08 01:57:30 +000012158 if ((ctxt->wellFormed) || recovery) {
12159 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012160 if (ret != NULL) {
12161 if (ctxt->input->buf->compressed > 0)
12162 ret->compression = 9;
12163 else
12164 ret->compression = ctxt->input->buf->compressed;
12165 }
William M. Brackc07329e2003-09-08 01:57:30 +000012166 }
Owen Taylor3473f882001-02-23 17:55:21 +000012167 else {
12168 ret = NULL;
12169 xmlFreeDoc(ctxt->myDoc);
12170 ctxt->myDoc = NULL;
12171 }
12172 if (sax != NULL)
12173 ctxt->sax = NULL;
12174 xmlFreeParserCtxt(ctxt);
12175
12176 return(ret);
12177}
12178
12179/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012180 * xmlSAXParseFile:
12181 * @sax: the SAX handler block
12182 * @filename: the filename
12183 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12184 * documents
12185 *
12186 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12187 * compressed document is provided by default if found at compile-time.
12188 * It use the given SAX function block to handle the parsing callback.
12189 * If sax is NULL, fallback to the default DOM tree building routines.
12190 *
12191 * Returns the resulting document tree
12192 */
12193
12194xmlDocPtr
12195xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12196 int recovery) {
12197 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12198}
12199
12200/**
Owen Taylor3473f882001-02-23 17:55:21 +000012201 * xmlRecoverDoc:
12202 * @cur: a pointer to an array of xmlChar
12203 *
12204 * parse an XML in-memory document and build a tree.
12205 * In the case the document is not Well Formed, a tree is built anyway
12206 *
12207 * Returns the resulting document tree
12208 */
12209
12210xmlDocPtr
12211xmlRecoverDoc(xmlChar *cur) {
12212 return(xmlSAXParseDoc(NULL, cur, 1));
12213}
12214
12215/**
12216 * xmlParseFile:
12217 * @filename: the filename
12218 *
12219 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12220 * compressed document is provided by default if found at compile-time.
12221 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012222 * Returns the resulting document tree if the file was wellformed,
12223 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012224 */
12225
12226xmlDocPtr
12227xmlParseFile(const char *filename) {
12228 return(xmlSAXParseFile(NULL, filename, 0));
12229}
12230
12231/**
12232 * xmlRecoverFile:
12233 * @filename: the filename
12234 *
12235 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12236 * compressed document is provided by default if found at compile-time.
12237 * In the case the document is not Well Formed, a tree is built anyway
12238 *
12239 * Returns the resulting document tree
12240 */
12241
12242xmlDocPtr
12243xmlRecoverFile(const char *filename) {
12244 return(xmlSAXParseFile(NULL, filename, 1));
12245}
12246
12247
12248/**
12249 * xmlSetupParserForBuffer:
12250 * @ctxt: an XML parser context
12251 * @buffer: a xmlChar * buffer
12252 * @filename: a file name
12253 *
12254 * Setup the parser context to parse a new buffer; Clears any prior
12255 * contents from the parser context. The buffer parameter must not be
12256 * NULL, but the filename parameter can be
12257 */
12258void
12259xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12260 const char* filename)
12261{
12262 xmlParserInputPtr input;
12263
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012264 if ((ctxt == NULL) || (buffer == NULL))
12265 return;
12266
Owen Taylor3473f882001-02-23 17:55:21 +000012267 input = xmlNewInputStream(ctxt);
12268 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012269 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012270 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012271 return;
12272 }
12273
12274 xmlClearParserCtxt(ctxt);
12275 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012276 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012277 input->base = buffer;
12278 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012279 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012280 inputPush(ctxt, input);
12281}
12282
12283/**
12284 * xmlSAXUserParseFile:
12285 * @sax: a SAX handler
12286 * @user_data: The user data returned on SAX callbacks
12287 * @filename: a file name
12288 *
12289 * parse an XML file and call the given SAX handler routines.
12290 * Automatic support for ZLIB/Compress compressed document is provided
12291 *
12292 * Returns 0 in case of success or a error number otherwise
12293 */
12294int
12295xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12296 const char *filename) {
12297 int ret = 0;
12298 xmlParserCtxtPtr ctxt;
12299
12300 ctxt = xmlCreateFileParserCtxt(filename);
12301 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012302#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012303 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012304#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012305 xmlFree(ctxt->sax);
12306 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012307 xmlDetectSAX2(ctxt);
12308
Owen Taylor3473f882001-02-23 17:55:21 +000012309 if (user_data != NULL)
12310 ctxt->userData = user_data;
12311
12312 xmlParseDocument(ctxt);
12313
12314 if (ctxt->wellFormed)
12315 ret = 0;
12316 else {
12317 if (ctxt->errNo != 0)
12318 ret = ctxt->errNo;
12319 else
12320 ret = -1;
12321 }
12322 if (sax != NULL)
12323 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012324 if (ctxt->myDoc != NULL) {
12325 xmlFreeDoc(ctxt->myDoc);
12326 ctxt->myDoc = NULL;
12327 }
Owen Taylor3473f882001-02-23 17:55:21 +000012328 xmlFreeParserCtxt(ctxt);
12329
12330 return ret;
12331}
Daniel Veillard81273902003-09-30 00:43:48 +000012332#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012333
12334/************************************************************************
12335 * *
12336 * Front ends when parsing from memory *
12337 * *
12338 ************************************************************************/
12339
12340/**
12341 * xmlCreateMemoryParserCtxt:
12342 * @buffer: a pointer to a char array
12343 * @size: the size of the array
12344 *
12345 * Create a parser context for an XML in-memory document.
12346 *
12347 * Returns the new parser context or NULL
12348 */
12349xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012350xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012351 xmlParserCtxtPtr ctxt;
12352 xmlParserInputPtr input;
12353 xmlParserInputBufferPtr buf;
12354
12355 if (buffer == NULL)
12356 return(NULL);
12357 if (size <= 0)
12358 return(NULL);
12359
12360 ctxt = xmlNewParserCtxt();
12361 if (ctxt == NULL)
12362 return(NULL);
12363
Daniel Veillard53350552003-09-18 13:35:51 +000012364 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012365 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012366 if (buf == NULL) {
12367 xmlFreeParserCtxt(ctxt);
12368 return(NULL);
12369 }
Owen Taylor3473f882001-02-23 17:55:21 +000012370
12371 input = xmlNewInputStream(ctxt);
12372 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012373 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012374 xmlFreeParserCtxt(ctxt);
12375 return(NULL);
12376 }
12377
12378 input->filename = NULL;
12379 input->buf = buf;
12380 input->base = input->buf->buffer->content;
12381 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012382 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012383
12384 inputPush(ctxt, input);
12385 return(ctxt);
12386}
12387
Daniel Veillard81273902003-09-30 00:43:48 +000012388#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012389/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012390 * xmlSAXParseMemoryWithData:
12391 * @sax: the SAX handler block
12392 * @buffer: an pointer to a char array
12393 * @size: the size of the array
12394 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12395 * documents
12396 * @data: the userdata
12397 *
12398 * parse an XML in-memory block and use the given SAX function block
12399 * to handle the parsing callback. If sax is NULL, fallback to the default
12400 * DOM tree building routines.
12401 *
12402 * User data (void *) is stored within the parser context in the
12403 * context's _private member, so it is available nearly everywhere in libxml
12404 *
12405 * Returns the resulting document tree
12406 */
12407
12408xmlDocPtr
12409xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12410 int size, int recovery, void *data) {
12411 xmlDocPtr ret;
12412 xmlParserCtxtPtr ctxt;
12413
12414 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12415 if (ctxt == NULL) return(NULL);
12416 if (sax != NULL) {
12417 if (ctxt->sax != NULL)
12418 xmlFree(ctxt->sax);
12419 ctxt->sax = sax;
12420 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012421 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012422 if (data!=NULL) {
12423 ctxt->_private=data;
12424 }
12425
Daniel Veillardadba5f12003-04-04 16:09:01 +000012426 ctxt->recovery = recovery;
12427
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012428 xmlParseDocument(ctxt);
12429
12430 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12431 else {
12432 ret = NULL;
12433 xmlFreeDoc(ctxt->myDoc);
12434 ctxt->myDoc = NULL;
12435 }
12436 if (sax != NULL)
12437 ctxt->sax = NULL;
12438 xmlFreeParserCtxt(ctxt);
12439
12440 return(ret);
12441}
12442
12443/**
Owen Taylor3473f882001-02-23 17:55:21 +000012444 * xmlSAXParseMemory:
12445 * @sax: the SAX handler block
12446 * @buffer: an pointer to a char array
12447 * @size: the size of the array
12448 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12449 * documents
12450 *
12451 * parse an XML in-memory block and use the given SAX function block
12452 * to handle the parsing callback. If sax is NULL, fallback to the default
12453 * DOM tree building routines.
12454 *
12455 * Returns the resulting document tree
12456 */
12457xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012458xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12459 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012460 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012461}
12462
12463/**
12464 * xmlParseMemory:
12465 * @buffer: an pointer to a char array
12466 * @size: the size of the array
12467 *
12468 * parse an XML in-memory block and build a tree.
12469 *
12470 * Returns the resulting document tree
12471 */
12472
Daniel Veillard50822cb2001-07-26 20:05:51 +000012473xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012474 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12475}
12476
12477/**
12478 * xmlRecoverMemory:
12479 * @buffer: an pointer to a char array
12480 * @size: the size of the array
12481 *
12482 * parse an XML in-memory block and build a tree.
12483 * In the case the document is not Well Formed, a tree is built anyway
12484 *
12485 * Returns the resulting document tree
12486 */
12487
Daniel Veillard50822cb2001-07-26 20:05:51 +000012488xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012489 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12490}
12491
12492/**
12493 * xmlSAXUserParseMemory:
12494 * @sax: a SAX handler
12495 * @user_data: The user data returned on SAX callbacks
12496 * @buffer: an in-memory XML document input
12497 * @size: the length of the XML document in bytes
12498 *
12499 * A better SAX parsing routine.
12500 * parse an XML in-memory buffer and call the given SAX handler routines.
12501 *
12502 * Returns 0 in case of success or a error number otherwise
12503 */
12504int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012505 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012506 int ret = 0;
12507 xmlParserCtxtPtr ctxt;
12508 xmlSAXHandlerPtr oldsax = NULL;
12509
Daniel Veillard9e923512002-08-14 08:48:52 +000012510 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012511 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12512 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012513 oldsax = ctxt->sax;
12514 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012515 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012516 if (user_data != NULL)
12517 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012518
12519 xmlParseDocument(ctxt);
12520
12521 if (ctxt->wellFormed)
12522 ret = 0;
12523 else {
12524 if (ctxt->errNo != 0)
12525 ret = ctxt->errNo;
12526 else
12527 ret = -1;
12528 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012529 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012530 if (ctxt->myDoc != NULL) {
12531 xmlFreeDoc(ctxt->myDoc);
12532 ctxt->myDoc = NULL;
12533 }
Owen Taylor3473f882001-02-23 17:55:21 +000012534 xmlFreeParserCtxt(ctxt);
12535
12536 return ret;
12537}
Daniel Veillard81273902003-09-30 00:43:48 +000012538#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012539
12540/**
12541 * xmlCreateDocParserCtxt:
12542 * @cur: a pointer to an array of xmlChar
12543 *
12544 * Creates a parser context for an XML in-memory document.
12545 *
12546 * Returns the new parser context or NULL
12547 */
12548xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012549xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012550 int len;
12551
12552 if (cur == NULL)
12553 return(NULL);
12554 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012555 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012556}
12557
Daniel Veillard81273902003-09-30 00:43:48 +000012558#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012559/**
12560 * xmlSAXParseDoc:
12561 * @sax: the SAX handler block
12562 * @cur: a pointer to an array of xmlChar
12563 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12564 * documents
12565 *
12566 * parse an XML in-memory document and build a tree.
12567 * It use the given SAX function block to handle the parsing callback.
12568 * If sax is NULL, fallback to the default DOM tree building routines.
12569 *
12570 * Returns the resulting document tree
12571 */
12572
12573xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012574xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012575 xmlDocPtr ret;
12576 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012577 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012578
Daniel Veillard38936062004-11-04 17:45:11 +000012579 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012580
12581
12582 ctxt = xmlCreateDocParserCtxt(cur);
12583 if (ctxt == NULL) return(NULL);
12584 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012585 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012586 ctxt->sax = sax;
12587 ctxt->userData = NULL;
12588 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012589 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012590
12591 xmlParseDocument(ctxt);
12592 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12593 else {
12594 ret = NULL;
12595 xmlFreeDoc(ctxt->myDoc);
12596 ctxt->myDoc = NULL;
12597 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012598 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012599 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012600 xmlFreeParserCtxt(ctxt);
12601
12602 return(ret);
12603}
12604
12605/**
12606 * xmlParseDoc:
12607 * @cur: a pointer to an array of xmlChar
12608 *
12609 * parse an XML in-memory document and build a tree.
12610 *
12611 * Returns the resulting document tree
12612 */
12613
12614xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012615xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012616 return(xmlSAXParseDoc(NULL, cur, 0));
12617}
Daniel Veillard81273902003-09-30 00:43:48 +000012618#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012619
Daniel Veillard81273902003-09-30 00:43:48 +000012620#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012621/************************************************************************
12622 * *
12623 * Specific function to keep track of entities references *
12624 * and used by the XSLT debugger *
12625 * *
12626 ************************************************************************/
12627
12628static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12629
12630/**
12631 * xmlAddEntityReference:
12632 * @ent : A valid entity
12633 * @firstNode : A valid first node for children of entity
12634 * @lastNode : A valid last node of children entity
12635 *
12636 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12637 */
12638static void
12639xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12640 xmlNodePtr lastNode)
12641{
12642 if (xmlEntityRefFunc != NULL) {
12643 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12644 }
12645}
12646
12647
12648/**
12649 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012650 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012651 *
12652 * Set the function to call call back when a xml reference has been made
12653 */
12654void
12655xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12656{
12657 xmlEntityRefFunc = func;
12658}
Daniel Veillard81273902003-09-30 00:43:48 +000012659#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012660
12661/************************************************************************
12662 * *
12663 * Miscellaneous *
12664 * *
12665 ************************************************************************/
12666
12667#ifdef LIBXML_XPATH_ENABLED
12668#include <libxml/xpath.h>
12669#endif
12670
Daniel Veillardffa3c742005-07-21 13:24:09 +000012671extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012672static int xmlParserInitialized = 0;
12673
12674/**
12675 * xmlInitParser:
12676 *
12677 * Initialization function for the XML parser.
12678 * This is not reentrant. Call once before processing in case of
12679 * use in multithreaded programs.
12680 */
12681
12682void
12683xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012684 if (xmlParserInitialized != 0)
12685 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012686
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012687 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12688 (xmlGenericError == NULL))
12689 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012690 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012691 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012692 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012693 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012694 xmlDefaultSAXHandlerInit();
12695 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012696#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012697 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012698#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012699#ifdef LIBXML_HTML_ENABLED
12700 htmlInitAutoClose();
12701 htmlDefaultSAXHandlerInit();
12702#endif
12703#ifdef LIBXML_XPATH_ENABLED
12704 xmlXPathInit();
12705#endif
12706 xmlParserInitialized = 1;
12707}
12708
12709/**
12710 * xmlCleanupParser:
12711 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012712 * Cleanup function for the XML library. It tries to reclaim all
12713 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012714 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012715 * function should not prevent reusing the library but one should
12716 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012717 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012718 */
12719
12720void
12721xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012722 if (!xmlParserInitialized)
12723 return;
12724
Owen Taylor3473f882001-02-23 17:55:21 +000012725 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012726#ifdef LIBXML_CATALOG_ENABLED
12727 xmlCatalogCleanup();
12728#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012729 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012730 xmlCleanupInputCallbacks();
12731#ifdef LIBXML_OUTPUT_ENABLED
12732 xmlCleanupOutputCallbacks();
12733#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012734#ifdef LIBXML_SCHEMAS_ENABLED
12735 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012736 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012737#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012738 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012739 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012740 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012741 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012742 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012743}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012744
12745/************************************************************************
12746 * *
12747 * New set (2.6.0) of simpler and more flexible APIs *
12748 * *
12749 ************************************************************************/
12750
12751/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012752 * DICT_FREE:
12753 * @str: a string
12754 *
12755 * Free a string if it is not owned by the "dict" dictionnary in the
12756 * current scope
12757 */
12758#define DICT_FREE(str) \
12759 if ((str) && ((!dict) || \
12760 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12761 xmlFree((char *)(str));
12762
12763/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012764 * xmlCtxtReset:
12765 * @ctxt: an XML parser context
12766 *
12767 * Reset a parser context
12768 */
12769void
12770xmlCtxtReset(xmlParserCtxtPtr ctxt)
12771{
12772 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012773 xmlDictPtr dict;
12774
12775 if (ctxt == NULL)
12776 return;
12777
12778 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012779
12780 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12781 xmlFreeInputStream(input);
12782 }
12783 ctxt->inputNr = 0;
12784 ctxt->input = NULL;
12785
12786 ctxt->spaceNr = 0;
12787 ctxt->spaceTab[0] = -1;
12788 ctxt->space = &ctxt->spaceTab[0];
12789
12790
12791 ctxt->nodeNr = 0;
12792 ctxt->node = NULL;
12793
12794 ctxt->nameNr = 0;
12795 ctxt->name = NULL;
12796
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012797 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012798 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012799 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012800 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012801 DICT_FREE(ctxt->directory);
12802 ctxt->directory = NULL;
12803 DICT_FREE(ctxt->extSubURI);
12804 ctxt->extSubURI = NULL;
12805 DICT_FREE(ctxt->extSubSystem);
12806 ctxt->extSubSystem = NULL;
12807 if (ctxt->myDoc != NULL)
12808 xmlFreeDoc(ctxt->myDoc);
12809 ctxt->myDoc = NULL;
12810
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012811 ctxt->standalone = -1;
12812 ctxt->hasExternalSubset = 0;
12813 ctxt->hasPErefs = 0;
12814 ctxt->html = 0;
12815 ctxt->external = 0;
12816 ctxt->instate = XML_PARSER_START;
12817 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012818
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012819 ctxt->wellFormed = 1;
12820 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012821 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012822 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012823#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012824 ctxt->vctxt.userData = ctxt;
12825 ctxt->vctxt.error = xmlParserValidityError;
12826 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012827#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012828 ctxt->record_info = 0;
12829 ctxt->nbChars = 0;
12830 ctxt->checkIndex = 0;
12831 ctxt->inSubset = 0;
12832 ctxt->errNo = XML_ERR_OK;
12833 ctxt->depth = 0;
12834 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12835 ctxt->catalogs = NULL;
12836 xmlInitNodeInfoSeq(&ctxt->node_seq);
12837
12838 if (ctxt->attsDefault != NULL) {
12839 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12840 ctxt->attsDefault = NULL;
12841 }
12842 if (ctxt->attsSpecial != NULL) {
12843 xmlHashFree(ctxt->attsSpecial, NULL);
12844 ctxt->attsSpecial = NULL;
12845 }
12846
Daniel Veillard4432df22003-09-28 18:58:27 +000012847#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012848 if (ctxt->catalogs != NULL)
12849 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012850#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012851 if (ctxt->lastError.code != XML_ERR_OK)
12852 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012853}
12854
12855/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012856 * xmlCtxtResetPush:
12857 * @ctxt: an XML parser context
12858 * @chunk: a pointer to an array of chars
12859 * @size: number of chars in the array
12860 * @filename: an optional file name or URI
12861 * @encoding: the document encoding, or NULL
12862 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012863 * Reset a push parser context
12864 *
12865 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012866 */
12867int
12868xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12869 int size, const char *filename, const char *encoding)
12870{
12871 xmlParserInputPtr inputStream;
12872 xmlParserInputBufferPtr buf;
12873 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12874
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012875 if (ctxt == NULL)
12876 return(1);
12877
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012878 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12879 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12880
12881 buf = xmlAllocParserInputBuffer(enc);
12882 if (buf == NULL)
12883 return(1);
12884
12885 if (ctxt == NULL) {
12886 xmlFreeParserInputBuffer(buf);
12887 return(1);
12888 }
12889
12890 xmlCtxtReset(ctxt);
12891
12892 if (ctxt->pushTab == NULL) {
12893 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12894 sizeof(xmlChar *));
12895 if (ctxt->pushTab == NULL) {
12896 xmlErrMemory(ctxt, NULL);
12897 xmlFreeParserInputBuffer(buf);
12898 return(1);
12899 }
12900 }
12901
12902 if (filename == NULL) {
12903 ctxt->directory = NULL;
12904 } else {
12905 ctxt->directory = xmlParserGetDirectory(filename);
12906 }
12907
12908 inputStream = xmlNewInputStream(ctxt);
12909 if (inputStream == NULL) {
12910 xmlFreeParserInputBuffer(buf);
12911 return(1);
12912 }
12913
12914 if (filename == NULL)
12915 inputStream->filename = NULL;
12916 else
12917 inputStream->filename = (char *)
12918 xmlCanonicPath((const xmlChar *) filename);
12919 inputStream->buf = buf;
12920 inputStream->base = inputStream->buf->buffer->content;
12921 inputStream->cur = inputStream->buf->buffer->content;
12922 inputStream->end =
12923 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12924
12925 inputPush(ctxt, inputStream);
12926
12927 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12928 (ctxt->input->buf != NULL)) {
12929 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12930 int cur = ctxt->input->cur - ctxt->input->base;
12931
12932 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12933
12934 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12935 ctxt->input->cur = ctxt->input->base + cur;
12936 ctxt->input->end =
12937 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12938 use];
12939#ifdef DEBUG_PUSH
12940 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12941#endif
12942 }
12943
12944 if (encoding != NULL) {
12945 xmlCharEncodingHandlerPtr hdlr;
12946
12947 hdlr = xmlFindCharEncodingHandler(encoding);
12948 if (hdlr != NULL) {
12949 xmlSwitchToEncoding(ctxt, hdlr);
12950 } else {
12951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12952 "Unsupported encoding %s\n", BAD_CAST encoding);
12953 }
12954 } else if (enc != XML_CHAR_ENCODING_NONE) {
12955 xmlSwitchEncoding(ctxt, enc);
12956 }
12957
12958 return(0);
12959}
12960
12961/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012962 * xmlCtxtUseOptions:
12963 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012964 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012965 *
12966 * Applies the options to the parser context
12967 *
12968 * Returns 0 in case of success, the set of unknown or unimplemented options
12969 * in case of error.
12970 */
12971int
12972xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12973{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012974 if (ctxt == NULL)
12975 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012976 if (options & XML_PARSE_RECOVER) {
12977 ctxt->recovery = 1;
12978 options -= XML_PARSE_RECOVER;
12979 } else
12980 ctxt->recovery = 0;
12981 if (options & XML_PARSE_DTDLOAD) {
12982 ctxt->loadsubset = XML_DETECT_IDS;
12983 options -= XML_PARSE_DTDLOAD;
12984 } else
12985 ctxt->loadsubset = 0;
12986 if (options & XML_PARSE_DTDATTR) {
12987 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12988 options -= XML_PARSE_DTDATTR;
12989 }
12990 if (options & XML_PARSE_NOENT) {
12991 ctxt->replaceEntities = 1;
12992 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12993 options -= XML_PARSE_NOENT;
12994 } else
12995 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012996 if (options & XML_PARSE_PEDANTIC) {
12997 ctxt->pedantic = 1;
12998 options -= XML_PARSE_PEDANTIC;
12999 } else
13000 ctxt->pedantic = 0;
13001 if (options & XML_PARSE_NOBLANKS) {
13002 ctxt->keepBlanks = 0;
13003 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13004 options -= XML_PARSE_NOBLANKS;
13005 } else
13006 ctxt->keepBlanks = 1;
13007 if (options & XML_PARSE_DTDVALID) {
13008 ctxt->validate = 1;
13009 if (options & XML_PARSE_NOWARNING)
13010 ctxt->vctxt.warning = NULL;
13011 if (options & XML_PARSE_NOERROR)
13012 ctxt->vctxt.error = NULL;
13013 options -= XML_PARSE_DTDVALID;
13014 } else
13015 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013016 if (options & XML_PARSE_NOWARNING) {
13017 ctxt->sax->warning = NULL;
13018 options -= XML_PARSE_NOWARNING;
13019 }
13020 if (options & XML_PARSE_NOERROR) {
13021 ctxt->sax->error = NULL;
13022 ctxt->sax->fatalError = NULL;
13023 options -= XML_PARSE_NOERROR;
13024 }
Daniel Veillard81273902003-09-30 00:43:48 +000013025#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013026 if (options & XML_PARSE_SAX1) {
13027 ctxt->sax->startElement = xmlSAX2StartElement;
13028 ctxt->sax->endElement = xmlSAX2EndElement;
13029 ctxt->sax->startElementNs = NULL;
13030 ctxt->sax->endElementNs = NULL;
13031 ctxt->sax->initialized = 1;
13032 options -= XML_PARSE_SAX1;
13033 }
Daniel Veillard81273902003-09-30 00:43:48 +000013034#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013035 if (options & XML_PARSE_NODICT) {
13036 ctxt->dictNames = 0;
13037 options -= XML_PARSE_NODICT;
13038 } else {
13039 ctxt->dictNames = 1;
13040 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013041 if (options & XML_PARSE_NOCDATA) {
13042 ctxt->sax->cdataBlock = NULL;
13043 options -= XML_PARSE_NOCDATA;
13044 }
13045 if (options & XML_PARSE_NSCLEAN) {
13046 ctxt->options |= XML_PARSE_NSCLEAN;
13047 options -= XML_PARSE_NSCLEAN;
13048 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013049 if (options & XML_PARSE_NONET) {
13050 ctxt->options |= XML_PARSE_NONET;
13051 options -= XML_PARSE_NONET;
13052 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013053 if (options & XML_PARSE_COMPACT) {
13054 ctxt->options |= XML_PARSE_COMPACT;
13055 options -= XML_PARSE_COMPACT;
13056 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013057 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013058 return (options);
13059}
13060
13061/**
13062 * xmlDoRead:
13063 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013064 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013065 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013066 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013067 * @reuse: keep the context for reuse
13068 *
13069 * Common front-end for the xmlRead functions
13070 *
13071 * Returns the resulting document tree or NULL
13072 */
13073static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013074xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13075 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013076{
13077 xmlDocPtr ret;
13078
13079 xmlCtxtUseOptions(ctxt, options);
13080 if (encoding != NULL) {
13081 xmlCharEncodingHandlerPtr hdlr;
13082
13083 hdlr = xmlFindCharEncodingHandler(encoding);
13084 if (hdlr != NULL)
13085 xmlSwitchToEncoding(ctxt, hdlr);
13086 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013087 if ((URL != NULL) && (ctxt->input != NULL) &&
13088 (ctxt->input->filename == NULL))
13089 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013090 xmlParseDocument(ctxt);
13091 if ((ctxt->wellFormed) || ctxt->recovery)
13092 ret = ctxt->myDoc;
13093 else {
13094 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013095 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013096 xmlFreeDoc(ctxt->myDoc);
13097 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013098 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013099 ctxt->myDoc = NULL;
13100 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013101 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013102 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013103
13104 return (ret);
13105}
13106
13107/**
13108 * xmlReadDoc:
13109 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013110 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013111 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013112 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013113 *
13114 * parse an XML in-memory document and build a tree.
13115 *
13116 * Returns the resulting document tree
13117 */
13118xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013119xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013120{
13121 xmlParserCtxtPtr ctxt;
13122
13123 if (cur == NULL)
13124 return (NULL);
13125
13126 ctxt = xmlCreateDocParserCtxt(cur);
13127 if (ctxt == NULL)
13128 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013129 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013130}
13131
13132/**
13133 * xmlReadFile:
13134 * @filename: a file or URL
13135 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013136 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013137 *
13138 * parse an XML file from the filesystem or the network.
13139 *
13140 * Returns the resulting document tree
13141 */
13142xmlDocPtr
13143xmlReadFile(const char *filename, const char *encoding, int options)
13144{
13145 xmlParserCtxtPtr ctxt;
13146
Daniel Veillard61b93382003-11-03 14:28:31 +000013147 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013148 if (ctxt == NULL)
13149 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013150 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013151}
13152
13153/**
13154 * xmlReadMemory:
13155 * @buffer: a pointer to a char array
13156 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013157 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013158 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013159 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013160 *
13161 * parse an XML in-memory document and build a tree.
13162 *
13163 * Returns the resulting document tree
13164 */
13165xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013166xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167{
13168 xmlParserCtxtPtr ctxt;
13169
13170 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13171 if (ctxt == NULL)
13172 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013173 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013174}
13175
13176/**
13177 * xmlReadFd:
13178 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013179 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013180 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013181 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013182 *
13183 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013184 * NOTE that the file descriptor will not be closed when the
13185 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013186 *
13187 * Returns the resulting document tree
13188 */
13189xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013190xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013191{
13192 xmlParserCtxtPtr ctxt;
13193 xmlParserInputBufferPtr input;
13194 xmlParserInputPtr stream;
13195
13196 if (fd < 0)
13197 return (NULL);
13198
13199 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13200 if (input == NULL)
13201 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013202 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013203 ctxt = xmlNewParserCtxt();
13204 if (ctxt == NULL) {
13205 xmlFreeParserInputBuffer(input);
13206 return (NULL);
13207 }
13208 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13209 if (stream == NULL) {
13210 xmlFreeParserInputBuffer(input);
13211 xmlFreeParserCtxt(ctxt);
13212 return (NULL);
13213 }
13214 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013215 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013216}
13217
13218/**
13219 * xmlReadIO:
13220 * @ioread: an I/O read function
13221 * @ioclose: an I/O close function
13222 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013223 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013224 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013225 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013226 *
13227 * parse an XML document from I/O functions and source and build a tree.
13228 *
13229 * Returns the resulting document tree
13230 */
13231xmlDocPtr
13232xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013233 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013234{
13235 xmlParserCtxtPtr ctxt;
13236 xmlParserInputBufferPtr input;
13237 xmlParserInputPtr stream;
13238
13239 if (ioread == NULL)
13240 return (NULL);
13241
13242 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13243 XML_CHAR_ENCODING_NONE);
13244 if (input == NULL)
13245 return (NULL);
13246 ctxt = xmlNewParserCtxt();
13247 if (ctxt == NULL) {
13248 xmlFreeParserInputBuffer(input);
13249 return (NULL);
13250 }
13251 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13252 if (stream == NULL) {
13253 xmlFreeParserInputBuffer(input);
13254 xmlFreeParserCtxt(ctxt);
13255 return (NULL);
13256 }
13257 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013258 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013259}
13260
13261/**
13262 * xmlCtxtReadDoc:
13263 * @ctxt: an XML parser context
13264 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013266 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013267 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013268 *
13269 * parse an XML in-memory document and build a tree.
13270 * This reuses the existing @ctxt parser context
13271 *
13272 * Returns the resulting document tree
13273 */
13274xmlDocPtr
13275xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013276 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013277{
13278 xmlParserInputPtr stream;
13279
13280 if (cur == NULL)
13281 return (NULL);
13282 if (ctxt == NULL)
13283 return (NULL);
13284
13285 xmlCtxtReset(ctxt);
13286
13287 stream = xmlNewStringInputStream(ctxt, cur);
13288 if (stream == NULL) {
13289 return (NULL);
13290 }
13291 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013292 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013293}
13294
13295/**
13296 * xmlCtxtReadFile:
13297 * @ctxt: an XML parser context
13298 * @filename: a file or URL
13299 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013300 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013301 *
13302 * parse an XML file from the filesystem or the network.
13303 * This reuses the existing @ctxt parser context
13304 *
13305 * Returns the resulting document tree
13306 */
13307xmlDocPtr
13308xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13309 const char *encoding, int options)
13310{
13311 xmlParserInputPtr stream;
13312
13313 if (filename == NULL)
13314 return (NULL);
13315 if (ctxt == NULL)
13316 return (NULL);
13317
13318 xmlCtxtReset(ctxt);
13319
Daniel Veillard29614c72004-11-26 10:47:26 +000013320 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013321 if (stream == NULL) {
13322 return (NULL);
13323 }
13324 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013325 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013326}
13327
13328/**
13329 * xmlCtxtReadMemory:
13330 * @ctxt: an XML parser context
13331 * @buffer: a pointer to a char array
13332 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013333 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013334 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013335 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013336 *
13337 * parse an XML in-memory document and build a tree.
13338 * This reuses the existing @ctxt parser context
13339 *
13340 * Returns the resulting document tree
13341 */
13342xmlDocPtr
13343xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013344 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013345{
13346 xmlParserInputBufferPtr input;
13347 xmlParserInputPtr stream;
13348
13349 if (ctxt == NULL)
13350 return (NULL);
13351 if (buffer == NULL)
13352 return (NULL);
13353
13354 xmlCtxtReset(ctxt);
13355
13356 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13357 if (input == NULL) {
13358 return(NULL);
13359 }
13360
13361 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13362 if (stream == NULL) {
13363 xmlFreeParserInputBuffer(input);
13364 return(NULL);
13365 }
13366
13367 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013368 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013369}
13370
13371/**
13372 * xmlCtxtReadFd:
13373 * @ctxt: an XML parser context
13374 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013375 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013376 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013377 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013378 *
13379 * parse an XML from a file descriptor and build a tree.
13380 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013381 * NOTE that the file descriptor will not be closed when the
13382 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013383 *
13384 * Returns the resulting document tree
13385 */
13386xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013387xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13388 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013389{
13390 xmlParserInputBufferPtr input;
13391 xmlParserInputPtr stream;
13392
13393 if (fd < 0)
13394 return (NULL);
13395 if (ctxt == NULL)
13396 return (NULL);
13397
13398 xmlCtxtReset(ctxt);
13399
13400
13401 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13402 if (input == NULL)
13403 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013404 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013405 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13406 if (stream == NULL) {
13407 xmlFreeParserInputBuffer(input);
13408 return (NULL);
13409 }
13410 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013411 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013412}
13413
13414/**
13415 * xmlCtxtReadIO:
13416 * @ctxt: an XML parser context
13417 * @ioread: an I/O read function
13418 * @ioclose: an I/O close function
13419 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013420 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013421 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013422 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013423 *
13424 * parse an XML document from I/O functions and source and build a tree.
13425 * This reuses the existing @ctxt parser context
13426 *
13427 * Returns the resulting document tree
13428 */
13429xmlDocPtr
13430xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13431 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013432 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013433 const char *encoding, int options)
13434{
13435 xmlParserInputBufferPtr input;
13436 xmlParserInputPtr stream;
13437
13438 if (ioread == NULL)
13439 return (NULL);
13440 if (ctxt == NULL)
13441 return (NULL);
13442
13443 xmlCtxtReset(ctxt);
13444
13445 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13446 XML_CHAR_ENCODING_NONE);
13447 if (input == NULL)
13448 return (NULL);
13449 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13450 if (stream == NULL) {
13451 xmlFreeParserInputBuffer(input);
13452 return (NULL);
13453 }
13454 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013455 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013456}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013457
13458#define bottom_parser
13459#include "elfgcchack.h"