blob: 459c5e9edc5b0fa9294f4eed1e4400016eec84c6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003458 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003459 in++;
3460 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003461 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 in++;
3463 }
3464 goto get_more_space;
3465 }
3466 if (*in == '<') {
3467 nbchar = in - ctxt->input->cur;
3468 if (nbchar > 0) {
3469 const xmlChar *tmp = ctxt->input->cur;
3470 ctxt->input->cur = in;
3471
Daniel Veillard34099b42004-11-04 17:34:35 +00003472 if ((ctxt->sax != NULL) &&
3473 (ctxt->sax->ignorableWhitespace !=
3474 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003475 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003476 if (ctxt->sax->ignorableWhitespace != NULL)
3477 ctxt->sax->ignorableWhitespace(ctxt->userData,
3478 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003479 } else if (ctxt->sax->characters != NULL)
3480 ctxt->sax->characters(ctxt->userData,
3481 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003482 } else if ((ctxt->sax != NULL) &&
3483 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003484 ctxt->sax->characters(ctxt->userData,
3485 tmp, nbchar);
3486 }
3487 }
3488 return;
3489 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003491get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003492 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003493 while (test_char_data[*in]) {
3494 in++;
3495 ccol++;
3496 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003497 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003498 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003499 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003500 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003501 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003502 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003503 in++;
3504 }
3505 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003506 }
3507 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003508 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003509 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003510 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 return;
3512 }
3513 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003514 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003515 goto get_more;
3516 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003517 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003518 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003522 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003523 const xmlChar *tmp = ctxt->input->cur;
3524 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003525
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003526 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003527 if (ctxt->sax->ignorableWhitespace != NULL)
3528 ctxt->sax->ignorableWhitespace(ctxt->userData,
3529 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003530 } else if (ctxt->sax->characters != NULL)
3531 ctxt->sax->characters(ctxt->userData,
3532 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003533 line = ctxt->input->line;
3534 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003535 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003536 if (ctxt->sax->characters != NULL)
3537 ctxt->sax->characters(ctxt->userData,
3538 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003539 line = ctxt->input->line;
3540 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003541 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 }
3543 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003544 if (*in == 0xD) {
3545 in++;
3546 if (*in == 0xA) {
3547 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003550 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003551 }
William M. Brackf4caa5e2005-10-20 09:04:05 +00003552 if (!*in) /* if end of current chunk return */
3553 return;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003554 in--;
3555 }
3556 if (*in == '<') {
3557 return;
3558 }
3559 if (*in == '&') {
3560 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003561 }
3562 SHRINK;
3563 GROW;
3564 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003565 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003566 nbchar = 0;
3567 }
Daniel Veillard50582112001-03-26 22:52:16 +00003568 ctxt->input->line = line;
3569 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003570 xmlParseCharDataComplex(ctxt, cdata);
3571}
3572
Daniel Veillard01c13b52002-12-10 15:19:08 +00003573/**
3574 * xmlParseCharDataComplex:
3575 * @ctxt: an XML parser context
3576 * @cdata: int indicating whether we are within a CDATA section
3577 *
3578 * parse a CharData section.this is the fallback function
3579 * of xmlParseCharData() when the parsing requires handling
3580 * of non-ASCII characters.
3581 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003582void
3583xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003584 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3585 int nbchar = 0;
3586 int cur, l;
3587 int count = 0;
3588
3589 SHRINK;
3590 GROW;
3591 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003592 while ((cur != '<') && /* checked */
3593 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003594 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003595 if ((cur == ']') && (NXT(1) == ']') &&
3596 (NXT(2) == '>')) {
3597 if (cdata) break;
3598 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003599 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003600 }
3601 }
3602 COPY_BUF(l,buf,nbchar,cur);
3603 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003604 buf[nbchar] = 0;
3605
Owen Taylor3473f882001-02-23 17:55:21 +00003606 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003607 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003608 */
3609 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003610 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003611 if (ctxt->sax->ignorableWhitespace != NULL)
3612 ctxt->sax->ignorableWhitespace(ctxt->userData,
3613 buf, nbchar);
3614 } else {
3615 if (ctxt->sax->characters != NULL)
3616 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3617 }
3618 }
3619 nbchar = 0;
3620 }
3621 count++;
3622 if (count > 50) {
3623 GROW;
3624 count = 0;
3625 }
3626 NEXTL(l);
3627 cur = CUR_CHAR(l);
3628 }
3629 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003630 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003631 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003632 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003633 */
3634 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003635 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (ctxt->sax->ignorableWhitespace != NULL)
3637 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3638 } else {
3639 if (ctxt->sax->characters != NULL)
3640 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3641 }
3642 }
3643 }
3644}
3645
3646/**
3647 * xmlParseExternalID:
3648 * @ctxt: an XML parser context
3649 * @publicID: a xmlChar** receiving PubidLiteral
3650 * @strict: indicate whether we should restrict parsing to only
3651 * production [75], see NOTE below
3652 *
3653 * Parse an External ID or a Public ID
3654 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003655 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003656 * 'PUBLIC' S PubidLiteral S SystemLiteral
3657 *
3658 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3659 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3660 *
3661 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3662 *
3663 * Returns the function returns SystemLiteral and in the second
3664 * case publicID receives PubidLiteral, is strict is off
3665 * it is possible to return NULL and have publicID set.
3666 */
3667
3668xmlChar *
3669xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3670 xmlChar *URI = NULL;
3671
3672 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003673
3674 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003675 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003676 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003677 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3679 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003680 }
3681 SKIP_BLANKS;
3682 URI = xmlParseSystemLiteral(ctxt);
3683 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003686 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003687 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003688 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003689 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003690 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003691 }
3692 SKIP_BLANKS;
3693 *publicID = xmlParsePubidLiteral(ctxt);
3694 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003695 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003696 }
3697 if (strict) {
3698 /*
3699 * We don't handle [83] so "S SystemLiteral" is required.
3700 */
William M. Brack76e95df2003-10-18 16:20:14 +00003701 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003703 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003704 }
3705 } else {
3706 /*
3707 * We handle [83] so we return immediately, if
3708 * "S SystemLiteral" is not detected. From a purely parsing
3709 * point of view that's a nice mess.
3710 */
3711 const xmlChar *ptr;
3712 GROW;
3713
3714 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003715 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716
William M. Brack76e95df2003-10-18 16:20:14 +00003717 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003718 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3719 }
3720 SKIP_BLANKS;
3721 URI = xmlParseSystemLiteral(ctxt);
3722 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003723 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003724 }
3725 }
3726 return(URI);
3727}
3728
3729/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003730 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003731 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003732 * @buf: the already parsed part of the buffer
3733 * @len: number of bytes filles in the buffer
3734 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003735 *
3736 * Skip an XML (SGML) comment <!-- .... -->
3737 * The spec says that "For compatibility, the string "--" (double-hyphen)
3738 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003739 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003740 *
3741 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3742 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003743static void
3744xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003745 int q, ql;
3746 int r, rl;
3747 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003748 xmlParserInputPtr input = ctxt->input;
3749 int count = 0;
3750
Owen Taylor3473f882001-02-23 17:55:21 +00003751 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003752 len = 0;
3753 size = XML_PARSER_BUFFER_SIZE;
3754 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3755 if (buf == NULL) {
3756 xmlErrMemory(ctxt, NULL);
3757 return;
3758 }
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
3760 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003761 if (q == 0)
3762 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 NEXTL(ql);
3764 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003765 if (r == 0)
3766 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003767 NEXTL(rl);
3768 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003769 if (cur == 0)
3770 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003771 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003772 ((cur != '>') ||
3773 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003774 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003775 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003776 }
3777 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003778 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003779 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003780 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3781 if (new_buf == NULL) {
3782 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 return;
3785 }
William M. Bracka3215c72004-07-31 16:24:01 +00003786 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003787 }
3788 COPY_BUF(ql,buf,len,q);
3789 q = r;
3790 ql = rl;
3791 r = cur;
3792 rl = l;
3793
3794 count++;
3795 if (count > 50) {
3796 GROW;
3797 count = 0;
3798 }
3799 NEXTL(l);
3800 cur = CUR_CHAR(l);
3801 if (cur == 0) {
3802 SHRINK;
3803 GROW;
3804 cur = CUR_CHAR(l);
3805 }
3806 }
3807 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003808 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003809 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003810 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003811 xmlFree(buf);
3812 } else {
3813 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003814 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3815 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003816 }
3817 NEXT;
3818 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3819 (!ctxt->disableSAX))
3820 ctxt->sax->comment(ctxt->userData, buf);
3821 xmlFree(buf);
3822 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003823 return;
3824not_terminated:
3825 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3826 "Comment not terminated\n", NULL);
3827 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003828}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003829/**
3830 * xmlParseComment:
3831 * @ctxt: an XML parser context
3832 *
3833 * Skip an XML (SGML) comment <!-- .... -->
3834 * The spec says that "For compatibility, the string "--" (double-hyphen)
3835 * must not occur within comments. "
3836 *
3837 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3838 */
3839void
3840xmlParseComment(xmlParserCtxtPtr ctxt) {
3841 xmlChar *buf = NULL;
3842 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003843 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003844 xmlParserInputState state;
3845 const xmlChar *in;
3846 int nbchar = 0, ccol;
3847
3848 /*
3849 * Check that there is a comment right here.
3850 */
3851 if ((RAW != '<') || (NXT(1) != '!') ||
3852 (NXT(2) != '-') || (NXT(3) != '-')) return;
3853
3854 state = ctxt->instate;
3855 ctxt->instate = XML_PARSER_COMMENT;
3856 SKIP(4);
3857 SHRINK;
3858 GROW;
3859
3860 /*
3861 * Accelerated common case where input don't need to be
3862 * modified before passing it to the handler.
3863 */
3864 in = ctxt->input->cur;
3865 do {
3866 if (*in == 0xA) {
3867 ctxt->input->line++; ctxt->input->col = 1;
3868 in++;
3869 while (*in == 0xA) {
3870 ctxt->input->line++; ctxt->input->col = 1;
3871 in++;
3872 }
3873 }
3874get_more:
3875 ccol = ctxt->input->col;
3876 while (((*in > '-') && (*in <= 0x7F)) ||
3877 ((*in >= 0x20) && (*in < '-')) ||
3878 (*in == 0x09)) {
3879 in++;
3880 ccol++;
3881 }
3882 ctxt->input->col = ccol;
3883 if (*in == 0xA) {
3884 ctxt->input->line++; ctxt->input->col = 1;
3885 in++;
3886 while (*in == 0xA) {
3887 ctxt->input->line++; ctxt->input->col = 1;
3888 in++;
3889 }
3890 goto get_more;
3891 }
3892 nbchar = in - ctxt->input->cur;
3893 /*
3894 * save current set of data
3895 */
3896 if (nbchar > 0) {
3897 if ((ctxt->sax != NULL) &&
3898 (ctxt->sax->comment != NULL)) {
3899 if (buf == NULL) {
3900 if ((*in == '-') && (in[1] == '-'))
3901 size = nbchar + 1;
3902 else
3903 size = XML_PARSER_BUFFER_SIZE + nbchar;
3904 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3905 if (buf == NULL) {
3906 xmlErrMemory(ctxt, NULL);
3907 ctxt->instate = state;
3908 return;
3909 }
3910 len = 0;
3911 } else if (len + nbchar + 1 >= size) {
3912 xmlChar *new_buf;
3913 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3914 new_buf = (xmlChar *) xmlRealloc(buf,
3915 size * sizeof(xmlChar));
3916 if (new_buf == NULL) {
3917 xmlFree (buf);
3918 xmlErrMemory(ctxt, NULL);
3919 ctxt->instate = state;
3920 return;
3921 }
3922 buf = new_buf;
3923 }
3924 memcpy(&buf[len], ctxt->input->cur, nbchar);
3925 len += nbchar;
3926 buf[len] = 0;
3927 }
3928 }
3929 ctxt->input->cur = in;
3930 if (*in == 0xA)
3931
3932 if (*in == 0xD) {
3933 in++;
3934 if (*in == 0xA) {
3935 ctxt->input->cur = in;
3936 in++;
3937 ctxt->input->line++; ctxt->input->col = 1;
3938 continue; /* while */
3939 }
3940 in--;
3941 }
3942 SHRINK;
3943 GROW;
3944 in = ctxt->input->cur;
3945 if (*in == '-') {
3946 if (in[1] == '-') {
3947 if (in[2] == '>') {
3948 SKIP(3);
3949 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3950 (!ctxt->disableSAX)) {
3951 if (buf != NULL)
3952 ctxt->sax->comment(ctxt->userData, buf);
3953 else
3954 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3955 }
3956 if (buf != NULL)
3957 xmlFree(buf);
3958 ctxt->instate = state;
3959 return;
3960 }
3961 if (buf != NULL)
3962 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3963 "Comment not terminated \n<!--%.50s\n",
3964 buf);
3965 else
3966 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3967 "Comment not terminated \n", NULL);
3968 in++;
3969 ctxt->input->col++;
3970 }
3971 in++;
3972 ctxt->input->col++;
3973 goto get_more;
3974 }
3975 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3976 xmlParseCommentComplex(ctxt, buf, len, size);
3977 ctxt->instate = state;
3978 return;
3979}
3980
Owen Taylor3473f882001-02-23 17:55:21 +00003981
3982/**
3983 * xmlParsePITarget:
3984 * @ctxt: an XML parser context
3985 *
3986 * parse the name of a PI
3987 *
3988 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3989 *
3990 * Returns the PITarget name or NULL
3991 */
3992
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003993const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003994xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003995 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003996
3997 name = xmlParseName(ctxt);
3998 if ((name != NULL) &&
3999 ((name[0] == 'x') || (name[0] == 'X')) &&
4000 ((name[1] == 'm') || (name[1] == 'M')) &&
4001 ((name[2] == 'l') || (name[2] == 'L'))) {
4002 int i;
4003 if ((name[0] == 'x') && (name[1] == 'm') &&
4004 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004005 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004006 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004007 return(name);
4008 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004009 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004010 return(name);
4011 }
4012 for (i = 0;;i++) {
4013 if (xmlW3CPIs[i] == NULL) break;
4014 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4015 return(name);
4016 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004017 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4018 "xmlParsePITarget: invalid name prefix 'xml'\n",
4019 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004020 }
4021 return(name);
4022}
4023
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004024#ifdef LIBXML_CATALOG_ENABLED
4025/**
4026 * xmlParseCatalogPI:
4027 * @ctxt: an XML parser context
4028 * @catalog: the PI value string
4029 *
4030 * parse an XML Catalog Processing Instruction.
4031 *
4032 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4033 *
4034 * Occurs only if allowed by the user and if happening in the Misc
4035 * part of the document before any doctype informations
4036 * This will add the given catalog to the parsing context in order
4037 * to be used if there is a resolution need further down in the document
4038 */
4039
4040static void
4041xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4042 xmlChar *URL = NULL;
4043 const xmlChar *tmp, *base;
4044 xmlChar marker;
4045
4046 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004047 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004048 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4049 goto error;
4050 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004051 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004052 if (*tmp != '=') {
4053 return;
4054 }
4055 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004056 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004057 marker = *tmp;
4058 if ((marker != '\'') && (marker != '"'))
4059 goto error;
4060 tmp++;
4061 base = tmp;
4062 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4063 if (*tmp == 0)
4064 goto error;
4065 URL = xmlStrndup(base, tmp - base);
4066 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004067 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004068 if (*tmp != 0)
4069 goto error;
4070
4071 if (URL != NULL) {
4072 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4073 xmlFree(URL);
4074 }
4075 return;
4076
4077error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004078 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4079 "Catalog PI syntax error: %s\n",
4080 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004081 if (URL != NULL)
4082 xmlFree(URL);
4083}
4084#endif
4085
Owen Taylor3473f882001-02-23 17:55:21 +00004086/**
4087 * xmlParsePI:
4088 * @ctxt: an XML parser context
4089 *
4090 * parse an XML Processing Instruction.
4091 *
4092 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4093 *
4094 * The processing is transfered to SAX once parsed.
4095 */
4096
4097void
4098xmlParsePI(xmlParserCtxtPtr ctxt) {
4099 xmlChar *buf = NULL;
4100 int len = 0;
4101 int size = XML_PARSER_BUFFER_SIZE;
4102 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004103 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004104 xmlParserInputState state;
4105 int count = 0;
4106
4107 if ((RAW == '<') && (NXT(1) == '?')) {
4108 xmlParserInputPtr input = ctxt->input;
4109 state = ctxt->instate;
4110 ctxt->instate = XML_PARSER_PI;
4111 /*
4112 * this is a Processing Instruction.
4113 */
4114 SKIP(2);
4115 SHRINK;
4116
4117 /*
4118 * Parse the target name and check for special support like
4119 * namespace.
4120 */
4121 target = xmlParsePITarget(ctxt);
4122 if (target != NULL) {
4123 if ((RAW == '?') && (NXT(1) == '>')) {
4124 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004125 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4126 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004127 }
4128 SKIP(2);
4129
4130 /*
4131 * SAX: PI detected.
4132 */
4133 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4134 (ctxt->sax->processingInstruction != NULL))
4135 ctxt->sax->processingInstruction(ctxt->userData,
4136 target, NULL);
4137 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004138 return;
4139 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004140 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004141 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004142 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004143 ctxt->instate = state;
4144 return;
4145 }
4146 cur = CUR;
4147 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4149 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004150 }
4151 SKIP_BLANKS;
4152 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004153 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004154 ((cur != '?') || (NXT(1) != '>'))) {
4155 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004156 xmlChar *tmp;
4157
Owen Taylor3473f882001-02-23 17:55:21 +00004158 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004159 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4160 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004161 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004162 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 ctxt->instate = state;
4164 return;
4165 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004166 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004167 }
4168 count++;
4169 if (count > 50) {
4170 GROW;
4171 count = 0;
4172 }
4173 COPY_BUF(l,buf,len,cur);
4174 NEXTL(l);
4175 cur = CUR_CHAR(l);
4176 if (cur == 0) {
4177 SHRINK;
4178 GROW;
4179 cur = CUR_CHAR(l);
4180 }
4181 }
4182 buf[len] = 0;
4183 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004184 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4185 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004186 } else {
4187 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004188 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4189 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004190 }
4191 SKIP(2);
4192
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004193#ifdef LIBXML_CATALOG_ENABLED
4194 if (((state == XML_PARSER_MISC) ||
4195 (state == XML_PARSER_START)) &&
4196 (xmlStrEqual(target, XML_CATALOG_PI))) {
4197 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4198 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4199 (allow == XML_CATA_ALLOW_ALL))
4200 xmlParseCatalogPI(ctxt, buf);
4201 }
4202#endif
4203
4204
Owen Taylor3473f882001-02-23 17:55:21 +00004205 /*
4206 * SAX: PI detected.
4207 */
4208 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4209 (ctxt->sax->processingInstruction != NULL))
4210 ctxt->sax->processingInstruction(ctxt->userData,
4211 target, buf);
4212 }
4213 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004214 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004215 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 ctxt->instate = state;
4218 }
4219}
4220
4221/**
4222 * xmlParseNotationDecl:
4223 * @ctxt: an XML parser context
4224 *
4225 * parse a notation declaration
4226 *
4227 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4228 *
4229 * Hence there is actually 3 choices:
4230 * 'PUBLIC' S PubidLiteral
4231 * 'PUBLIC' S PubidLiteral S SystemLiteral
4232 * and 'SYSTEM' S SystemLiteral
4233 *
4234 * See the NOTE on xmlParseExternalID().
4235 */
4236
4237void
4238xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004239 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 xmlChar *Pubid;
4241 xmlChar *Systemid;
4242
Daniel Veillarda07050d2003-10-19 14:46:32 +00004243 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004244 xmlParserInputPtr input = ctxt->input;
4245 SHRINK;
4246 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004247 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004248 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4249 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004250 return;
4251 }
4252 SKIP_BLANKS;
4253
Daniel Veillard76d66f42001-05-16 21:05:17 +00004254 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004255 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004256 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 return;
4258 }
William M. Brack76e95df2003-10-18 16:20:14 +00004259 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004260 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004261 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004262 return;
4263 }
4264 SKIP_BLANKS;
4265
4266 /*
4267 * Parse the IDs.
4268 */
4269 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4270 SKIP_BLANKS;
4271
4272 if (RAW == '>') {
4273 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004274 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4275 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004276 }
4277 NEXT;
4278 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4279 (ctxt->sax->notationDecl != NULL))
4280 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4281 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004282 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284 if (Systemid != NULL) xmlFree(Systemid);
4285 if (Pubid != NULL) xmlFree(Pubid);
4286 }
4287}
4288
4289/**
4290 * xmlParseEntityDecl:
4291 * @ctxt: an XML parser context
4292 *
4293 * parse <!ENTITY declarations
4294 *
4295 * [70] EntityDecl ::= GEDecl | PEDecl
4296 *
4297 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4298 *
4299 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4300 *
4301 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4302 *
4303 * [74] PEDef ::= EntityValue | ExternalID
4304 *
4305 * [76] NDataDecl ::= S 'NDATA' S Name
4306 *
4307 * [ VC: Notation Declared ]
4308 * The Name must match the declared name of a notation.
4309 */
4310
4311void
4312xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004313 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004314 xmlChar *value = NULL;
4315 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004316 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004317 int isParameter = 0;
4318 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004319 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004320
Daniel Veillard4c778d82005-01-23 17:37:44 +00004321 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004322 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004323 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004324 SHRINK;
4325 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004326 skipped = SKIP_BLANKS;
4327 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004328 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4329 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
Owen Taylor3473f882001-02-23 17:55:21 +00004331
4332 if (RAW == '%') {
4333 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004334 skipped = SKIP_BLANKS;
4335 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4337 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
Owen Taylor3473f882001-02-23 17:55:21 +00004339 isParameter = 1;
4340 }
4341
Daniel Veillard76d66f42001-05-16 21:05:17 +00004342 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004343 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4345 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004346 return;
4347 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004348 skipped = SKIP_BLANKS;
4349 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004350 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4351 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004352 }
Owen Taylor3473f882001-02-23 17:55:21 +00004353
Daniel Veillardf5582f12002-06-11 10:08:16 +00004354 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004355 /*
4356 * handle the various case of definitions...
4357 */
4358 if (isParameter) {
4359 if ((RAW == '"') || (RAW == '\'')) {
4360 value = xmlParseEntityValue(ctxt, &orig);
4361 if (value) {
4362 if ((ctxt->sax != NULL) &&
4363 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4364 ctxt->sax->entityDecl(ctxt->userData, name,
4365 XML_INTERNAL_PARAMETER_ENTITY,
4366 NULL, NULL, value);
4367 }
4368 } else {
4369 URI = xmlParseExternalID(ctxt, &literal, 1);
4370 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004371 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004372 }
4373 if (URI) {
4374 xmlURIPtr uri;
4375
4376 uri = xmlParseURI((const char *) URI);
4377 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004378 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4379 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004380 /*
4381 * This really ought to be a well formedness error
4382 * but the XML Core WG decided otherwise c.f. issue
4383 * E26 of the XML erratas.
4384 */
Owen Taylor3473f882001-02-23 17:55:21 +00004385 } else {
4386 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004387 /*
4388 * Okay this is foolish to block those but not
4389 * invalid URIs.
4390 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004391 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004392 } else {
4393 if ((ctxt->sax != NULL) &&
4394 (!ctxt->disableSAX) &&
4395 (ctxt->sax->entityDecl != NULL))
4396 ctxt->sax->entityDecl(ctxt->userData, name,
4397 XML_EXTERNAL_PARAMETER_ENTITY,
4398 literal, URI, NULL);
4399 }
4400 xmlFreeURI(uri);
4401 }
4402 }
4403 }
4404 } else {
4405 if ((RAW == '"') || (RAW == '\'')) {
4406 value = xmlParseEntityValue(ctxt, &orig);
4407 if ((ctxt->sax != NULL) &&
4408 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4409 ctxt->sax->entityDecl(ctxt->userData, name,
4410 XML_INTERNAL_GENERAL_ENTITY,
4411 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004412 /*
4413 * For expat compatibility in SAX mode.
4414 */
4415 if ((ctxt->myDoc == NULL) ||
4416 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4417 if (ctxt->myDoc == NULL) {
4418 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4419 }
4420 if (ctxt->myDoc->intSubset == NULL)
4421 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4422 BAD_CAST "fake", NULL, NULL);
4423
Daniel Veillard1af9a412003-08-20 22:54:39 +00004424 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4425 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 } else {
4428 URI = xmlParseExternalID(ctxt, &literal, 1);
4429 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 if (URI) {
4433 xmlURIPtr uri;
4434
4435 uri = xmlParseURI((const char *)URI);
4436 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004437 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4438 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004439 /*
4440 * This really ought to be a well formedness error
4441 * but the XML Core WG decided otherwise c.f. issue
4442 * E26 of the XML erratas.
4443 */
Owen Taylor3473f882001-02-23 17:55:21 +00004444 } else {
4445 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004446 /*
4447 * Okay this is foolish to block those but not
4448 * invalid URIs.
4449 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004450 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004451 }
4452 xmlFreeURI(uri);
4453 }
4454 }
William M. Brack76e95df2003-10-18 16:20:14 +00004455 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004456 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4457 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004458 }
4459 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004460 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004461 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004462 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004463 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4464 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004465 }
4466 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004467 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004468 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4469 (ctxt->sax->unparsedEntityDecl != NULL))
4470 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4471 literal, URI, ndata);
4472 } else {
4473 if ((ctxt->sax != NULL) &&
4474 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4475 ctxt->sax->entityDecl(ctxt->userData, name,
4476 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4477 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004478 /*
4479 * For expat compatibility in SAX mode.
4480 * assuming the entity repalcement was asked for
4481 */
4482 if ((ctxt->replaceEntities != 0) &&
4483 ((ctxt->myDoc == NULL) ||
4484 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4485 if (ctxt->myDoc == NULL) {
4486 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4487 }
4488
4489 if (ctxt->myDoc->intSubset == NULL)
4490 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4491 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004492 xmlSAX2EntityDecl(ctxt, name,
4493 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4494 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004495 }
Owen Taylor3473f882001-02-23 17:55:21 +00004496 }
4497 }
4498 }
4499 SKIP_BLANKS;
4500 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004501 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004502 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 } else {
4504 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004505 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4506 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004507 }
4508 NEXT;
4509 }
4510 if (orig != NULL) {
4511 /*
4512 * Ugly mechanism to save the raw entity value.
4513 */
4514 xmlEntityPtr cur = NULL;
4515
4516 if (isParameter) {
4517 if ((ctxt->sax != NULL) &&
4518 (ctxt->sax->getParameterEntity != NULL))
4519 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4520 } else {
4521 if ((ctxt->sax != NULL) &&
4522 (ctxt->sax->getEntity != NULL))
4523 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004524 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004525 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004526 }
Owen Taylor3473f882001-02-23 17:55:21 +00004527 }
4528 if (cur != NULL) {
4529 if (cur->orig != NULL)
4530 xmlFree(orig);
4531 else
4532 cur->orig = orig;
4533 } else
4534 xmlFree(orig);
4535 }
Owen Taylor3473f882001-02-23 17:55:21 +00004536 if (value != NULL) xmlFree(value);
4537 if (URI != NULL) xmlFree(URI);
4538 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004539 }
4540}
4541
4542/**
4543 * xmlParseDefaultDecl:
4544 * @ctxt: an XML parser context
4545 * @value: Receive a possible fixed default value for the attribute
4546 *
4547 * Parse an attribute default declaration
4548 *
4549 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4550 *
4551 * [ VC: Required Attribute ]
4552 * if the default declaration is the keyword #REQUIRED, then the
4553 * attribute must be specified for all elements of the type in the
4554 * attribute-list declaration.
4555 *
4556 * [ VC: Attribute Default Legal ]
4557 * The declared default value must meet the lexical constraints of
4558 * the declared attribute type c.f. xmlValidateAttributeDecl()
4559 *
4560 * [ VC: Fixed Attribute Default ]
4561 * if an attribute has a default value declared with the #FIXED
4562 * keyword, instances of that attribute must match the default value.
4563 *
4564 * [ WFC: No < in Attribute Values ]
4565 * handled in xmlParseAttValue()
4566 *
4567 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4568 * or XML_ATTRIBUTE_FIXED.
4569 */
4570
4571int
4572xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4573 int val;
4574 xmlChar *ret;
4575
4576 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004577 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004578 SKIP(9);
4579 return(XML_ATTRIBUTE_REQUIRED);
4580 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004581 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004582 SKIP(8);
4583 return(XML_ATTRIBUTE_IMPLIED);
4584 }
4585 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004586 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004587 SKIP(6);
4588 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004589 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004590 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4591 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004592 }
4593 SKIP_BLANKS;
4594 }
4595 ret = xmlParseAttValue(ctxt);
4596 ctxt->instate = XML_PARSER_DTD;
4597 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004598 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004599 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004600 } else
4601 *value = ret;
4602 return(val);
4603}
4604
4605/**
4606 * xmlParseNotationType:
4607 * @ctxt: an XML parser context
4608 *
4609 * parse an Notation attribute type.
4610 *
4611 * Note: the leading 'NOTATION' S part has already being parsed...
4612 *
4613 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4614 *
4615 * [ VC: Notation Attributes ]
4616 * Values of this type must match one of the notation names included
4617 * in the declaration; all notation names in the declaration must be declared.
4618 *
4619 * Returns: the notation attribute tree built while parsing
4620 */
4621
4622xmlEnumerationPtr
4623xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004624 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004625 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4626
4627 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004628 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004629 return(NULL);
4630 }
4631 SHRINK;
4632 do {
4633 NEXT;
4634 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004635 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4638 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004639 return(ret);
4640 }
4641 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004642 if (cur == NULL) return(ret);
4643 if (last == NULL) ret = last = cur;
4644 else {
4645 last->next = cur;
4646 last = cur;
4647 }
4648 SKIP_BLANKS;
4649 } while (RAW == '|');
4650 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004651 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004652 if ((last != NULL) && (last != ret))
4653 xmlFreeEnumeration(last);
4654 return(ret);
4655 }
4656 NEXT;
4657 return(ret);
4658}
4659
4660/**
4661 * xmlParseEnumerationType:
4662 * @ctxt: an XML parser context
4663 *
4664 * parse an Enumeration attribute type.
4665 *
4666 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4667 *
4668 * [ VC: Enumeration ]
4669 * Values of this type must match one of the Nmtoken tokens in
4670 * the declaration
4671 *
4672 * Returns: the enumeration attribute tree built while parsing
4673 */
4674
4675xmlEnumerationPtr
4676xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4677 xmlChar *name;
4678 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4679
4680 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004681 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004682 return(NULL);
4683 }
4684 SHRINK;
4685 do {
4686 NEXT;
4687 SKIP_BLANKS;
4688 name = xmlParseNmtoken(ctxt);
4689 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004690 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 return(ret);
4692 }
4693 cur = xmlCreateEnumeration(name);
4694 xmlFree(name);
4695 if (cur == NULL) return(ret);
4696 if (last == NULL) ret = last = cur;
4697 else {
4698 last->next = cur;
4699 last = cur;
4700 }
4701 SKIP_BLANKS;
4702 } while (RAW == '|');
4703 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004704 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004705 return(ret);
4706 }
4707 NEXT;
4708 return(ret);
4709}
4710
4711/**
4712 * xmlParseEnumeratedType:
4713 * @ctxt: an XML parser context
4714 * @tree: the enumeration tree built while parsing
4715 *
4716 * parse an Enumerated attribute type.
4717 *
4718 * [57] EnumeratedType ::= NotationType | Enumeration
4719 *
4720 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4721 *
4722 *
4723 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4724 */
4725
4726int
4727xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004728 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004729 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004730 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004731 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4732 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004733 return(0);
4734 }
4735 SKIP_BLANKS;
4736 *tree = xmlParseNotationType(ctxt);
4737 if (*tree == NULL) return(0);
4738 return(XML_ATTRIBUTE_NOTATION);
4739 }
4740 *tree = xmlParseEnumerationType(ctxt);
4741 if (*tree == NULL) return(0);
4742 return(XML_ATTRIBUTE_ENUMERATION);
4743}
4744
4745/**
4746 * xmlParseAttributeType:
4747 * @ctxt: an XML parser context
4748 * @tree: the enumeration tree built while parsing
4749 *
4750 * parse the Attribute list def for an element
4751 *
4752 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4753 *
4754 * [55] StringType ::= 'CDATA'
4755 *
4756 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4757 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4758 *
4759 * Validity constraints for attribute values syntax are checked in
4760 * xmlValidateAttributeValue()
4761 *
4762 * [ VC: ID ]
4763 * Values of type ID must match the Name production. A name must not
4764 * appear more than once in an XML document as a value of this type;
4765 * i.e., ID values must uniquely identify the elements which bear them.
4766 *
4767 * [ VC: One ID per Element Type ]
4768 * No element type may have more than one ID attribute specified.
4769 *
4770 * [ VC: ID Attribute Default ]
4771 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4772 *
4773 * [ VC: IDREF ]
4774 * Values of type IDREF must match the Name production, and values
4775 * of type IDREFS must match Names; each IDREF Name must match the value
4776 * of an ID attribute on some element in the XML document; i.e. IDREF
4777 * values must match the value of some ID attribute.
4778 *
4779 * [ VC: Entity Name ]
4780 * Values of type ENTITY must match the Name production, values
4781 * of type ENTITIES must match Names; each Entity Name must match the
4782 * name of an unparsed entity declared in the DTD.
4783 *
4784 * [ VC: Name Token ]
4785 * Values of type NMTOKEN must match the Nmtoken production; values
4786 * of type NMTOKENS must match Nmtokens.
4787 *
4788 * Returns the attribute type
4789 */
4790int
4791xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4792 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004793 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004794 SKIP(5);
4795 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004796 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004797 SKIP(6);
4798 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004799 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004800 SKIP(5);
4801 return(XML_ATTRIBUTE_IDREF);
4802 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4803 SKIP(2);
4804 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004805 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004806 SKIP(6);
4807 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004808 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004809 SKIP(8);
4810 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004811 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004812 SKIP(8);
4813 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004814 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004815 SKIP(7);
4816 return(XML_ATTRIBUTE_NMTOKEN);
4817 }
4818 return(xmlParseEnumeratedType(ctxt, tree));
4819}
4820
4821/**
4822 * xmlParseAttributeListDecl:
4823 * @ctxt: an XML parser context
4824 *
4825 * : parse the Attribute list def for an element
4826 *
4827 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4828 *
4829 * [53] AttDef ::= S Name S AttType S DefaultDecl
4830 *
4831 */
4832void
4833xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004834 const xmlChar *elemName;
4835 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004836 xmlEnumerationPtr tree;
4837
Daniel Veillarda07050d2003-10-19 14:46:32 +00004838 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004839 xmlParserInputPtr input = ctxt->input;
4840
4841 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004842 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004843 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004844 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004845 }
4846 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004847 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004848 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004849 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4850 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004851 return;
4852 }
4853 SKIP_BLANKS;
4854 GROW;
4855 while (RAW != '>') {
4856 const xmlChar *check = CUR_PTR;
4857 int type;
4858 int def;
4859 xmlChar *defaultValue = NULL;
4860
4861 GROW;
4862 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004863 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004864 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004865 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4866 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004867 break;
4868 }
4869 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004870 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004871 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004872 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004873 if (defaultValue != NULL)
4874 xmlFree(defaultValue);
4875 break;
4876 }
4877 SKIP_BLANKS;
4878
4879 type = xmlParseAttributeType(ctxt, &tree);
4880 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004881 if (defaultValue != NULL)
4882 xmlFree(defaultValue);
4883 break;
4884 }
4885
4886 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004887 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004888 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4889 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004890 if (defaultValue != NULL)
4891 xmlFree(defaultValue);
4892 if (tree != NULL)
4893 xmlFreeEnumeration(tree);
4894 break;
4895 }
4896 SKIP_BLANKS;
4897
4898 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4899 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004900 if (defaultValue != NULL)
4901 xmlFree(defaultValue);
4902 if (tree != NULL)
4903 xmlFreeEnumeration(tree);
4904 break;
4905 }
4906
4907 GROW;
4908 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004909 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004910 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004911 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004912 if (defaultValue != NULL)
4913 xmlFree(defaultValue);
4914 if (tree != NULL)
4915 xmlFreeEnumeration(tree);
4916 break;
4917 }
4918 SKIP_BLANKS;
4919 }
4920 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004921 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4922 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004923 if (defaultValue != NULL)
4924 xmlFree(defaultValue);
4925 if (tree != NULL)
4926 xmlFreeEnumeration(tree);
4927 break;
4928 }
4929 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4930 (ctxt->sax->attributeDecl != NULL))
4931 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4932 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004933 else if (tree != NULL)
4934 xmlFreeEnumeration(tree);
4935
4936 if ((ctxt->sax2) && (defaultValue != NULL) &&
4937 (def != XML_ATTRIBUTE_IMPLIED) &&
4938 (def != XML_ATTRIBUTE_REQUIRED)) {
4939 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4940 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004941 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4942 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4943 }
Owen Taylor3473f882001-02-23 17:55:21 +00004944 if (defaultValue != NULL)
4945 xmlFree(defaultValue);
4946 GROW;
4947 }
4948 if (RAW == '>') {
4949 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004950 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4951 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004952 }
4953 NEXT;
4954 }
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
4956}
4957
4958/**
4959 * xmlParseElementMixedContentDecl:
4960 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004961 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004962 *
4963 * parse the declaration for a Mixed Element content
4964 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4965 *
4966 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4967 * '(' S? '#PCDATA' S? ')'
4968 *
4969 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4970 *
4971 * [ VC: No Duplicate Types ]
4972 * The same name must not appear more than once in a single
4973 * mixed-content declaration.
4974 *
4975 * returns: the list of the xmlElementContentPtr describing the element choices
4976 */
4977xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004978xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004979 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004980 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004981
4982 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004983 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004984 SKIP(7);
4985 SKIP_BLANKS;
4986 SHRINK;
4987 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004988 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004989 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4990"Element content declaration doesn't start and stop in the same entity\n",
4991 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004992 }
Owen Taylor3473f882001-02-23 17:55:21 +00004993 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004994 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (RAW == '*') {
4996 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4997 NEXT;
4998 }
4999 return(ret);
5000 }
5001 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005002 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005003 if (ret == NULL) return(NULL);
5004 }
5005 while (RAW == '|') {
5006 NEXT;
5007 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005008 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005009 if (ret == NULL) return(NULL);
5010 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005011 if (cur != NULL)
5012 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005013 cur = ret;
5014 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005015 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005016 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005017 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005018 if (n->c1 != NULL)
5019 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005020 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005021 if (n != NULL)
5022 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005023 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005024 }
5025 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005026 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005028 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005029 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005030 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005031 return(NULL);
5032 }
5033 SKIP_BLANKS;
5034 GROW;
5035 }
5036 if ((RAW == ')') && (NXT(1) == '*')) {
5037 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005038 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005039 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005040 if (cur->c2 != NULL)
5041 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005042 }
5043 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005044 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005045 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5046"Element content declaration doesn't start and stop in the same entity\n",
5047 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005048 }
Owen Taylor3473f882001-02-23 17:55:21 +00005049 SKIP(2);
5050 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005051 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005052 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005053 return(NULL);
5054 }
5055
5056 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005057 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005058 }
5059 return(ret);
5060}
5061
5062/**
5063 * xmlParseElementChildrenContentDecl:
5064 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005065 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005066 *
5067 * parse the declaration for a Mixed Element content
5068 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5069 *
5070 *
5071 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5072 *
5073 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5074 *
5075 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5076 *
5077 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5078 *
5079 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5080 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005081 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005082 * opening or closing parentheses in a choice, seq, or Mixed
5083 * construct is contained in the replacement text for a parameter
5084 * entity, both must be contained in the same replacement text. For
5085 * interoperability, if a parameter-entity reference appears in a
5086 * choice, seq, or Mixed construct, its replacement text should not
5087 * be empty, and neither the first nor last non-blank character of
5088 * the replacement text should be a connector (| or ,).
5089 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005090 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005091 * hierarchy.
5092 */
5093xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005094xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005095 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005096 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005097 xmlChar type = 0;
5098
5099 SKIP_BLANKS;
5100 GROW;
5101 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005102 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005103
Owen Taylor3473f882001-02-23 17:55:21 +00005104 /* Recurse on first child */
5105 NEXT;
5106 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005107 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005108 SKIP_BLANKS;
5109 GROW;
5110 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005111 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005112 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005113 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005114 return(NULL);
5115 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005116 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005117 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005118 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005119 return(NULL);
5120 }
Owen Taylor3473f882001-02-23 17:55:21 +00005121 GROW;
5122 if (RAW == '?') {
5123 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5124 NEXT;
5125 } else if (RAW == '*') {
5126 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5127 NEXT;
5128 } else if (RAW == '+') {
5129 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5130 NEXT;
5131 } else {
5132 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5133 }
Owen Taylor3473f882001-02-23 17:55:21 +00005134 GROW;
5135 }
5136 SKIP_BLANKS;
5137 SHRINK;
5138 while (RAW != ')') {
5139 /*
5140 * Each loop we parse one separator and one element.
5141 */
5142 if (RAW == ',') {
5143 if (type == 0) type = CUR;
5144
5145 /*
5146 * Detect "Name | Name , Name" error
5147 */
5148 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005149 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005150 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005151 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005152 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005153 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005155 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 return(NULL);
5157 }
5158 NEXT;
5159
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005160 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005161 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005162 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005163 xmlFreeDocElementContent(ctxt->myDoc, last);
5164 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005165 return(NULL);
5166 }
5167 if (last == NULL) {
5168 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005169 if (ret != NULL)
5170 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 ret = cur = op;
5172 } else {
5173 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005174 if (op != NULL)
5175 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005176 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005177 if (last != NULL)
5178 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005179 cur =op;
5180 last = NULL;
5181 }
5182 } else if (RAW == '|') {
5183 if (type == 0) type = CUR;
5184
5185 /*
5186 * Detect "Name , Name | Name" error
5187 */
5188 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005189 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005190 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005191 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005192 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005193 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005195 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 return(NULL);
5197 }
5198 NEXT;
5199
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005200 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005201 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005202 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005203 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005205 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005206 return(NULL);
5207 }
5208 if (last == NULL) {
5209 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005210 if (ret != NULL)
5211 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005212 ret = cur = op;
5213 } else {
5214 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005215 if (op != NULL)
5216 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005217 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (last != NULL)
5219 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 cur =op;
5221 last = NULL;
5222 }
5223 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005224 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005225 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 return(NULL);
5228 }
5229 GROW;
5230 SKIP_BLANKS;
5231 GROW;
5232 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005233 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005234 /* Recurse on second child */
5235 NEXT;
5236 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005237 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005238 SKIP_BLANKS;
5239 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005240 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005242 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005243 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 return(NULL);
5246 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005247 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005248 if (RAW == '?') {
5249 last->ocur = XML_ELEMENT_CONTENT_OPT;
5250 NEXT;
5251 } else if (RAW == '*') {
5252 last->ocur = XML_ELEMENT_CONTENT_MULT;
5253 NEXT;
5254 } else if (RAW == '+') {
5255 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5256 NEXT;
5257 } else {
5258 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5259 }
5260 }
5261 SKIP_BLANKS;
5262 GROW;
5263 }
5264 if ((cur != NULL) && (last != NULL)) {
5265 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005266 if (last != NULL)
5267 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005269 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005270 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5271"Element content declaration doesn't start and stop in the same entity\n",
5272 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005273 }
Owen Taylor3473f882001-02-23 17:55:21 +00005274 NEXT;
5275 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005276 if (ret != NULL) {
5277 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5278 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5279 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5280 else
5281 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5282 }
Owen Taylor3473f882001-02-23 17:55:21 +00005283 NEXT;
5284 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005285 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005286 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005287 cur = ret;
5288 /*
5289 * Some normalization:
5290 * (a | b* | c?)* == (a | b | c)*
5291 */
5292 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5293 if ((cur->c1 != NULL) &&
5294 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5295 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5296 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5297 if ((cur->c2 != NULL) &&
5298 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5299 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5300 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5301 cur = cur->c2;
5302 }
5303 }
Owen Taylor3473f882001-02-23 17:55:21 +00005304 NEXT;
5305 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005306 if (ret != NULL) {
5307 int found = 0;
5308
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005309 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5310 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5311 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005312 else
5313 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005314 /*
5315 * Some normalization:
5316 * (a | b*)+ == (a | b)*
5317 * (a | b?)+ == (a | b)*
5318 */
5319 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5320 if ((cur->c1 != NULL) &&
5321 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5322 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5323 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5324 found = 1;
5325 }
5326 if ((cur->c2 != NULL) &&
5327 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5328 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5329 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5330 found = 1;
5331 }
5332 cur = cur->c2;
5333 }
5334 if (found)
5335 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5336 }
Owen Taylor3473f882001-02-23 17:55:21 +00005337 NEXT;
5338 }
5339 return(ret);
5340}
5341
5342/**
5343 * xmlParseElementContentDecl:
5344 * @ctxt: an XML parser context
5345 * @name: the name of the element being defined.
5346 * @result: the Element Content pointer will be stored here if any
5347 *
5348 * parse the declaration for an Element content either Mixed or Children,
5349 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5350 *
5351 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5352 *
5353 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5354 */
5355
5356int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005357xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005358 xmlElementContentPtr *result) {
5359
5360 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005361 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005362 int res;
5363
5364 *result = NULL;
5365
5366 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005367 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005368 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return(-1);
5370 }
5371 NEXT;
5372 GROW;
5373 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005374 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005375 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005376 res = XML_ELEMENT_TYPE_MIXED;
5377 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005378 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 res = XML_ELEMENT_TYPE_ELEMENT;
5380 }
Owen Taylor3473f882001-02-23 17:55:21 +00005381 SKIP_BLANKS;
5382 *result = tree;
5383 return(res);
5384}
5385
5386/**
5387 * xmlParseElementDecl:
5388 * @ctxt: an XML parser context
5389 *
5390 * parse an Element declaration.
5391 *
5392 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5393 *
5394 * [ VC: Unique Element Type Declaration ]
5395 * No element type may be declared more than once
5396 *
5397 * Returns the type of the element, or -1 in case of error
5398 */
5399int
5400xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005401 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005402 int ret = -1;
5403 xmlElementContentPtr content = NULL;
5404
Daniel Veillard4c778d82005-01-23 17:37:44 +00005405 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005406 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005407 xmlParserInputPtr input = ctxt->input;
5408
5409 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005410 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005411 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5412 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005413 }
5414 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005415 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005416 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005417 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5418 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005419 return(-1);
5420 }
5421 while ((RAW == 0) && (ctxt->inputNr > 1))
5422 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005423 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5425 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005426 }
5427 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005428 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005429 SKIP(5);
5430 /*
5431 * Element must always be empty.
5432 */
5433 ret = XML_ELEMENT_TYPE_EMPTY;
5434 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5435 (NXT(2) == 'Y')) {
5436 SKIP(3);
5437 /*
5438 * Element is a generic container.
5439 */
5440 ret = XML_ELEMENT_TYPE_ANY;
5441 } else if (RAW == '(') {
5442 ret = xmlParseElementContentDecl(ctxt, name, &content);
5443 } else {
5444 /*
5445 * [ WFC: PEs in Internal Subset ] error handling.
5446 */
5447 if ((RAW == '%') && (ctxt->external == 0) &&
5448 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005449 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005450 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005451 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005452 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005453 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5454 }
Owen Taylor3473f882001-02-23 17:55:21 +00005455 return(-1);
5456 }
5457
5458 SKIP_BLANKS;
5459 /*
5460 * Pop-up of finished entities.
5461 */
5462 while ((RAW == 0) && (ctxt->inputNr > 1))
5463 xmlPopInput(ctxt);
5464 SKIP_BLANKS;
5465
5466 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005467 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005468 if (content != NULL) {
5469 xmlFreeDocElementContent(ctxt->myDoc, content);
5470 }
Owen Taylor3473f882001-02-23 17:55:21 +00005471 } else {
5472 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5474 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476
5477 NEXT;
5478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005479 (ctxt->sax->elementDecl != NULL)) {
5480 if (content != NULL)
5481 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005482 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5483 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005484 if ((content != NULL) && (content->parent == NULL)) {
5485 /*
5486 * this is a trick: if xmlAddElementDecl is called,
5487 * instead of copying the full tree it is plugged directly
5488 * if called from the parser. Avoid duplicating the
5489 * interfaces or change the API/ABI
5490 */
5491 xmlFreeDocElementContent(ctxt->myDoc, content);
5492 }
5493 } else if (content != NULL) {
5494 xmlFreeDocElementContent(ctxt->myDoc, content);
5495 }
Owen Taylor3473f882001-02-23 17:55:21 +00005496 }
Owen Taylor3473f882001-02-23 17:55:21 +00005497 }
5498 return(ret);
5499}
5500
5501/**
Owen Taylor3473f882001-02-23 17:55:21 +00005502 * xmlParseConditionalSections
5503 * @ctxt: an XML parser context
5504 *
5505 * [61] conditionalSect ::= includeSect | ignoreSect
5506 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5507 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5508 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5509 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5510 */
5511
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005512static void
Owen Taylor3473f882001-02-23 17:55:21 +00005513xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5514 SKIP(3);
5515 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005516 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005517 SKIP(7);
5518 SKIP_BLANKS;
5519 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005520 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005521 } else {
5522 NEXT;
5523 }
5524 if (xmlParserDebugEntities) {
5525 if ((ctxt->input != NULL) && (ctxt->input->filename))
5526 xmlGenericError(xmlGenericErrorContext,
5527 "%s(%d): ", ctxt->input->filename,
5528 ctxt->input->line);
5529 xmlGenericError(xmlGenericErrorContext,
5530 "Entering INCLUDE Conditional Section\n");
5531 }
5532
5533 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5534 (NXT(2) != '>'))) {
5535 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005536 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005537
5538 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5539 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005540 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005541 NEXT;
5542 } else if (RAW == '%') {
5543 xmlParsePEReference(ctxt);
5544 } else
5545 xmlParseMarkupDecl(ctxt);
5546
5547 /*
5548 * Pop-up of finished entities.
5549 */
5550 while ((RAW == 0) && (ctxt->inputNr > 1))
5551 xmlPopInput(ctxt);
5552
Daniel Veillardfdc91562002-07-01 21:52:03 +00005553 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005554 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005555 break;
5556 }
5557 }
5558 if (xmlParserDebugEntities) {
5559 if ((ctxt->input != NULL) && (ctxt->input->filename))
5560 xmlGenericError(xmlGenericErrorContext,
5561 "%s(%d): ", ctxt->input->filename,
5562 ctxt->input->line);
5563 xmlGenericError(xmlGenericErrorContext,
5564 "Leaving INCLUDE Conditional Section\n");
5565 }
5566
Daniel Veillarda07050d2003-10-19 14:46:32 +00005567 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005568 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005569 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005570 int depth = 0;
5571
5572 SKIP(6);
5573 SKIP_BLANKS;
5574 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005575 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005576 } else {
5577 NEXT;
5578 }
5579 if (xmlParserDebugEntities) {
5580 if ((ctxt->input != NULL) && (ctxt->input->filename))
5581 xmlGenericError(xmlGenericErrorContext,
5582 "%s(%d): ", ctxt->input->filename,
5583 ctxt->input->line);
5584 xmlGenericError(xmlGenericErrorContext,
5585 "Entering IGNORE Conditional Section\n");
5586 }
5587
5588 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005589 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005590 * But disable SAX event generating DTD building in the meantime
5591 */
5592 state = ctxt->disableSAX;
5593 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005594 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005595 ctxt->instate = XML_PARSER_IGNORE;
5596
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005597 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005598 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5599 depth++;
5600 SKIP(3);
5601 continue;
5602 }
5603 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5604 if (--depth >= 0) SKIP(3);
5605 continue;
5606 }
5607 NEXT;
5608 continue;
5609 }
5610
5611 ctxt->disableSAX = state;
5612 ctxt->instate = instate;
5613
5614 if (xmlParserDebugEntities) {
5615 if ((ctxt->input != NULL) && (ctxt->input->filename))
5616 xmlGenericError(xmlGenericErrorContext,
5617 "%s(%d): ", ctxt->input->filename,
5618 ctxt->input->line);
5619 xmlGenericError(xmlGenericErrorContext,
5620 "Leaving IGNORE Conditional Section\n");
5621 }
5622
5623 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005624 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005625 }
5626
5627 if (RAW == 0)
5628 SHRINK;
5629
5630 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005631 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else {
5633 SKIP(3);
5634 }
5635}
5636
5637/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005638 * xmlParseMarkupDecl:
5639 * @ctxt: an XML parser context
5640 *
5641 * parse Markup declarations
5642 *
5643 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5644 * NotationDecl | PI | Comment
5645 *
5646 * [ VC: Proper Declaration/PE Nesting ]
5647 * Parameter-entity replacement text must be properly nested with
5648 * markup declarations. That is to say, if either the first character
5649 * or the last character of a markup declaration (markupdecl above) is
5650 * contained in the replacement text for a parameter-entity reference,
5651 * both must be contained in the same replacement text.
5652 *
5653 * [ WFC: PEs in Internal Subset ]
5654 * In the internal DTD subset, parameter-entity references can occur
5655 * only where markup declarations can occur, not within markup declarations.
5656 * (This does not apply to references that occur in external parameter
5657 * entities or to the external subset.)
5658 */
5659void
5660xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5661 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005662 if (CUR == '<') {
5663 if (NXT(1) == '!') {
5664 switch (NXT(2)) {
5665 case 'E':
5666 if (NXT(3) == 'L')
5667 xmlParseElementDecl(ctxt);
5668 else if (NXT(3) == 'N')
5669 xmlParseEntityDecl(ctxt);
5670 break;
5671 case 'A':
5672 xmlParseAttributeListDecl(ctxt);
5673 break;
5674 case 'N':
5675 xmlParseNotationDecl(ctxt);
5676 break;
5677 case '-':
5678 xmlParseComment(ctxt);
5679 break;
5680 default:
5681 /* there is an error but it will be detected later */
5682 break;
5683 }
5684 } else if (NXT(1) == '?') {
5685 xmlParsePI(ctxt);
5686 }
5687 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005688 /*
5689 * This is only for internal subset. On external entities,
5690 * the replacement is done before parsing stage
5691 */
5692 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5693 xmlParsePEReference(ctxt);
5694
5695 /*
5696 * Conditional sections are allowed from entities included
5697 * by PE References in the internal subset.
5698 */
5699 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5700 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5701 xmlParseConditionalSections(ctxt);
5702 }
5703 }
5704
5705 ctxt->instate = XML_PARSER_DTD;
5706}
5707
5708/**
5709 * xmlParseTextDecl:
5710 * @ctxt: an XML parser context
5711 *
5712 * parse an XML declaration header for external entities
5713 *
5714 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5715 *
5716 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5717 */
5718
5719void
5720xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5721 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005722 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005723
5724 /*
5725 * We know that '<?xml' is here.
5726 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005727 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005728 SKIP(5);
5729 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005730 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005731 return;
5732 }
5733
William M. Brack76e95df2003-10-18 16:20:14 +00005734 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5736 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005737 }
5738 SKIP_BLANKS;
5739
5740 /*
5741 * We may have the VersionInfo here.
5742 */
5743 version = xmlParseVersionInfo(ctxt);
5744 if (version == NULL)
5745 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005746 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005747 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5749 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005750 }
5751 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005752 ctxt->input->version = version;
5753
5754 /*
5755 * We must have the encoding declaration
5756 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005757 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005758 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5759 /*
5760 * The XML REC instructs us to stop parsing right here
5761 */
5762 return;
5763 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005764 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5765 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5766 "Missing encoding in text declaration\n");
5767 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005768
5769 SKIP_BLANKS;
5770 if ((RAW == '?') && (NXT(1) == '>')) {
5771 SKIP(2);
5772 } else if (RAW == '>') {
5773 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005774 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005775 NEXT;
5776 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005777 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005778 MOVETO_ENDTAG(CUR_PTR);
5779 NEXT;
5780 }
5781}
5782
5783/**
Owen Taylor3473f882001-02-23 17:55:21 +00005784 * xmlParseExternalSubset:
5785 * @ctxt: an XML parser context
5786 * @ExternalID: the external identifier
5787 * @SystemID: the system identifier (or URL)
5788 *
5789 * parse Markup declarations from an external subset
5790 *
5791 * [30] extSubset ::= textDecl? extSubsetDecl
5792 *
5793 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5794 */
5795void
5796xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5797 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005798 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005800 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005801 xmlParseTextDecl(ctxt);
5802 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803 /*
5804 * The XML REC instructs us to stop parsing right here
5805 */
5806 ctxt->instate = XML_PARSER_EOF;
5807 return;
5808 }
5809 }
5810 if (ctxt->myDoc == NULL) {
5811 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5812 }
5813 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5814 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5815
5816 ctxt->instate = XML_PARSER_DTD;
5817 ctxt->external = 1;
5818 while (((RAW == '<') && (NXT(1) == '?')) ||
5819 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005820 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005821 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005822 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005823
5824 GROW;
5825 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5826 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005827 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005828 NEXT;
5829 } else if (RAW == '%') {
5830 xmlParsePEReference(ctxt);
5831 } else
5832 xmlParseMarkupDecl(ctxt);
5833
5834 /*
5835 * Pop-up of finished entities.
5836 */
5837 while ((RAW == 0) && (ctxt->inputNr > 1))
5838 xmlPopInput(ctxt);
5839
Daniel Veillardfdc91562002-07-01 21:52:03 +00005840 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005841 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005842 break;
5843 }
5844 }
5845
5846 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005847 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 }
5849
5850}
5851
5852/**
5853 * xmlParseReference:
5854 * @ctxt: an XML parser context
5855 *
5856 * parse and handle entity references in content, depending on the SAX
5857 * interface, this may end-up in a call to character() if this is a
5858 * CharRef, a predefined entity, if there is no reference() callback.
5859 * or if the parser was asked to switch to that mode.
5860 *
5861 * [67] Reference ::= EntityRef | CharRef
5862 */
5863void
5864xmlParseReference(xmlParserCtxtPtr ctxt) {
5865 xmlEntityPtr ent;
5866 xmlChar *val;
5867 if (RAW != '&') return;
5868
5869 if (NXT(1) == '#') {
5870 int i = 0;
5871 xmlChar out[10];
5872 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005873 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005874
5875 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5876 /*
5877 * So we are using non-UTF-8 buffers
5878 * Check that the char fit on 8bits, if not
5879 * generate a CharRef.
5880 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005881 if (value <= 0xFF) {
5882 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 out[1] = 0;
5884 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5885 (!ctxt->disableSAX))
5886 ctxt->sax->characters(ctxt->userData, out, 1);
5887 } else {
5888 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005889 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005891 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5893 (!ctxt->disableSAX))
5894 ctxt->sax->reference(ctxt->userData, out);
5895 }
5896 } else {
5897 /*
5898 * Just encode the value in UTF-8
5899 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005900 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005901 out[i] = 0;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5903 (!ctxt->disableSAX))
5904 ctxt->sax->characters(ctxt->userData, out, i);
5905 }
5906 } else {
5907 ent = xmlParseEntityRef(ctxt);
5908 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005909 if (!ctxt->wellFormed)
5910 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005911 if ((ent->name != NULL) &&
5912 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5913 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005914 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005915
5916
5917 /*
5918 * The first reference to the entity trigger a parsing phase
5919 * where the ent->children is filled with the result from
5920 * the parsing.
5921 */
5922 if (ent->children == NULL) {
5923 xmlChar *value;
5924 value = ent->content;
5925
5926 /*
5927 * Check that this entity is well formed
5928 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005929 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005930 (value[1] == 0) && (value[0] == '<') &&
5931 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5932 /*
5933 * DONE: get definite answer on this !!!
5934 * Lots of entity decls are used to declare a single
5935 * char
5936 * <!ENTITY lt "<">
5937 * Which seems to be valid since
5938 * 2.4: The ampersand character (&) and the left angle
5939 * bracket (<) may appear in their literal form only
5940 * when used ... They are also legal within the literal
5941 * entity value of an internal entity declaration;i
5942 * see "4.3.2 Well-Formed Parsed Entities".
5943 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5944 * Looking at the OASIS test suite and James Clark
5945 * tests, this is broken. However the XML REC uses
5946 * it. Is the XML REC not well-formed ????
5947 * This is a hack to avoid this problem
5948 *
5949 * ANSWER: since lt gt amp .. are already defined,
5950 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005951 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005952 * is lousy but acceptable.
5953 */
5954 list = xmlNewDocText(ctxt->myDoc, value);
5955 if (list != NULL) {
5956 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5957 (ent->children == NULL)) {
5958 ent->children = list;
5959 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005960 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005961 list->parent = (xmlNodePtr) ent;
5962 } else {
5963 xmlFreeNodeList(list);
5964 }
5965 } else if (list != NULL) {
5966 xmlFreeNodeList(list);
5967 }
5968 } else {
5969 /*
5970 * 4.3.2: An internal general parsed entity is well-formed
5971 * if its replacement text matches the production labeled
5972 * content.
5973 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005974
5975 void *user_data;
5976 /*
5977 * This is a bit hackish but this seems the best
5978 * way to make sure both SAX and DOM entity support
5979 * behaves okay.
5980 */
5981 if (ctxt->userData == ctxt)
5982 user_data = NULL;
5983 else
5984 user_data = ctxt->userData;
5985
Owen Taylor3473f882001-02-23 17:55:21 +00005986 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5987 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005988 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5989 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005990 ctxt->depth--;
5991 } else if (ent->etype ==
5992 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5993 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005994 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005995 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005996 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005997 ctxt->depth--;
5998 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005999 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006000 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6001 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006002 }
6003 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006004 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006005 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006006 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006007 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6008 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006009 (ent->children == NULL)) {
6010 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006011 if (ctxt->replaceEntities) {
6012 /*
6013 * Prune it directly in the generated document
6014 * except for single text nodes.
6015 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006016 if (((list->type == XML_TEXT_NODE) &&
6017 (list->next == NULL)) ||
6018 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006019 list->parent = (xmlNodePtr) ent;
6020 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006021 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006022 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006023 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006024 while (list != NULL) {
6025 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006026 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006027 if (list->next == NULL)
6028 ent->last = list;
6029 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006030 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006031 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006032#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006033 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6034 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006035#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006036 }
6037 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006038 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006039 while (list != NULL) {
6040 list->parent = (xmlNodePtr) ent;
6041 if (list->next == NULL)
6042 ent->last = list;
6043 list = list->next;
6044 }
Owen Taylor3473f882001-02-23 17:55:21 +00006045 }
6046 } else {
6047 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006048 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006050 } else if ((ret != XML_ERR_OK) &&
6051 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006052 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006053 } else if (list != NULL) {
6054 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006055 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006056 }
6057 }
6058 }
6059 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6060 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6061 /*
6062 * Create a node.
6063 */
6064 ctxt->sax->reference(ctxt->userData, ent->name);
6065 return;
6066 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006067 /*
6068 * There is a problem on the handling of _private for entities
6069 * (bug 155816): Should we copy the content of the field from
6070 * the entity (possibly overwriting some value set by the user
6071 * when a copy is created), should we leave it alone, or should
6072 * we try to take care of different situations? The problem
6073 * is exacerbated by the usage of this field by the xmlReader.
6074 * To fix this bug, we look at _private on the created node
6075 * and, if it's NULL, we copy in whatever was in the entity.
6076 * If it's not NULL we leave it alone. This is somewhat of a
6077 * hack - maybe we should have further tests to determine
6078 * what to do.
6079 */
Owen Taylor3473f882001-02-23 17:55:21 +00006080 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6081 /*
6082 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006083 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006084 * In the first occurrence list contains the replacement.
6085 * progressive == 2 means we are operating on the Reader
6086 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006087 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006088 if (((list == NULL) && (ent->owner == 0)) ||
6089 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006090 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006091
6092 /*
6093 * when operating on a reader, the entities definitions
6094 * are always owning the entities subtree.
6095 if (ctxt->parseMode == XML_PARSE_READER)
6096 ent->owner = 1;
6097 */
6098
Daniel Veillard62f313b2001-07-04 19:49:14 +00006099 cur = ent->children;
6100 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006101 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006102 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006103 if (nw->_private == NULL)
6104 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006105 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006106 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006107 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006108 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006109 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006110 if (cur == ent->last) {
6111 /*
6112 * needed to detect some strange empty
6113 * node cases in the reader tests
6114 */
6115 if ((ctxt->parseMode == XML_PARSE_READER) &&
6116 (nw->type == XML_ELEMENT_NODE) &&
6117 (nw->children == NULL))
6118 nw->extra = 1;
6119
Daniel Veillard62f313b2001-07-04 19:49:14 +00006120 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006121 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006122 cur = cur->next;
6123 }
Daniel Veillard81273902003-09-30 00:43:48 +00006124#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006125 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006126 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006127#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006128 } else if (list == NULL) {
6129 xmlNodePtr nw = NULL, cur, next, last,
6130 firstChild = NULL;
6131 /*
6132 * Copy the entity child list and make it the new
6133 * entity child list. The goal is to make sure any
6134 * ID or REF referenced will be the one from the
6135 * document content and not the entity copy.
6136 */
6137 cur = ent->children;
6138 ent->children = NULL;
6139 last = ent->last;
6140 ent->last = NULL;
6141 while (cur != NULL) {
6142 next = cur->next;
6143 cur->next = NULL;
6144 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006145 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006146 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006147 if (nw->_private == NULL)
6148 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006149 if (firstChild == NULL){
6150 firstChild = cur;
6151 }
6152 xmlAddChild((xmlNodePtr) ent, nw);
6153 xmlAddChild(ctxt->node, cur);
6154 }
6155 if (cur == last)
6156 break;
6157 cur = next;
6158 }
6159 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006160#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006161 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6162 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006163#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006164 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006165 const xmlChar *nbktext;
6166
Daniel Veillard62f313b2001-07-04 19:49:14 +00006167 /*
6168 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006169 * node with a possible previous text one which
6170 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006171 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006172 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6173 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006174 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006175 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006176 if ((ent->last != ent->children) &&
6177 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006178 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006179 xmlAddChildList(ctxt->node, ent->children);
6180 }
6181
Owen Taylor3473f882001-02-23 17:55:21 +00006182 /*
6183 * This is to avoid a nasty side effect, see
6184 * characters() in SAX.c
6185 */
6186 ctxt->nodemem = 0;
6187 ctxt->nodelen = 0;
6188 return;
6189 } else {
6190 /*
6191 * Probably running in SAX mode
6192 */
6193 xmlParserInputPtr input;
6194
6195 input = xmlNewEntityInputStream(ctxt, ent);
6196 xmlPushInput(ctxt, input);
6197 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006198 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6199 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006200 xmlParseTextDecl(ctxt);
6201 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6202 /*
6203 * The XML REC instructs us to stop parsing right here
6204 */
6205 ctxt->instate = XML_PARSER_EOF;
6206 return;
6207 }
6208 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006209 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6210 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006211 }
6212 }
6213 return;
6214 }
6215 }
6216 } else {
6217 val = ent->content;
6218 if (val == NULL) return;
6219 /*
6220 * inline the entity.
6221 */
6222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6223 (!ctxt->disableSAX))
6224 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6225 }
6226 }
6227}
6228
6229/**
6230 * xmlParseEntityRef:
6231 * @ctxt: an XML parser context
6232 *
6233 * parse ENTITY references declarations
6234 *
6235 * [68] EntityRef ::= '&' Name ';'
6236 *
6237 * [ WFC: Entity Declared ]
6238 * In a document without any DTD, a document with only an internal DTD
6239 * subset which contains no parameter entity references, or a document
6240 * with "standalone='yes'", the Name given in the entity reference
6241 * must match that in an entity declaration, except that well-formed
6242 * documents need not declare any of the following entities: amp, lt,
6243 * gt, apos, quot. The declaration of a parameter entity must precede
6244 * any reference to it. Similarly, the declaration of a general entity
6245 * must precede any reference to it which appears in a default value in an
6246 * attribute-list declaration. Note that if entities are declared in the
6247 * external subset or in external parameter entities, a non-validating
6248 * processor is not obligated to read and process their declarations;
6249 * for such documents, the rule that an entity must be declared is a
6250 * well-formedness constraint only if standalone='yes'.
6251 *
6252 * [ WFC: Parsed Entity ]
6253 * An entity reference must not contain the name of an unparsed entity
6254 *
6255 * Returns the xmlEntityPtr if found, or NULL otherwise.
6256 */
6257xmlEntityPtr
6258xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006259 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006260 xmlEntityPtr ent = NULL;
6261
6262 GROW;
6263
6264 if (RAW == '&') {
6265 NEXT;
6266 name = xmlParseName(ctxt);
6267 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6269 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006270 } else {
6271 if (RAW == ';') {
6272 NEXT;
6273 /*
6274 * Ask first SAX for entity resolution, otherwise try the
6275 * predefined set.
6276 */
6277 if (ctxt->sax != NULL) {
6278 if (ctxt->sax->getEntity != NULL)
6279 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006280 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006281 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006282 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6283 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006284 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006285 }
Owen Taylor3473f882001-02-23 17:55:21 +00006286 }
6287 /*
6288 * [ WFC: Entity Declared ]
6289 * In a document without any DTD, a document with only an
6290 * internal DTD subset which contains no parameter entity
6291 * references, or a document with "standalone='yes'", the
6292 * Name given in the entity reference must match that in an
6293 * entity declaration, except that well-formed documents
6294 * need not declare any of the following entities: amp, lt,
6295 * gt, apos, quot.
6296 * The declaration of a parameter entity must precede any
6297 * reference to it.
6298 * Similarly, the declaration of a general entity must
6299 * precede any reference to it which appears in a default
6300 * value in an attribute-list declaration. Note that if
6301 * entities are declared in the external subset or in
6302 * external parameter entities, a non-validating processor
6303 * is not obligated to read and process their declarations;
6304 * for such documents, the rule that an entity must be
6305 * declared is a well-formedness constraint only if
6306 * standalone='yes'.
6307 */
6308 if (ent == NULL) {
6309 if ((ctxt->standalone == 1) ||
6310 ((ctxt->hasExternalSubset == 0) &&
6311 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006312 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006313 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006314 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006315 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006316 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006317 if ((ctxt->inSubset == 0) &&
6318 (ctxt->sax != NULL) &&
6319 (ctxt->sax->reference != NULL)) {
6320 ctxt->sax->reference(ctxt, name);
6321 }
Owen Taylor3473f882001-02-23 17:55:21 +00006322 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006323 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006324 }
6325
6326 /*
6327 * [ WFC: Parsed Entity ]
6328 * An entity reference must not contain the name of an
6329 * unparsed entity
6330 */
6331 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006333 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006334 }
6335
6336 /*
6337 * [ WFC: No External Entity References ]
6338 * Attribute values cannot contain direct or indirect
6339 * entity references to external entities.
6340 */
6341 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6342 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006343 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6344 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006345 }
6346 /*
6347 * [ WFC: No < in Attribute Values ]
6348 * The replacement text of any entity referred to directly or
6349 * indirectly in an attribute value (other than "&lt;") must
6350 * not contain a <.
6351 */
6352 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6353 (ent != NULL) &&
6354 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6355 (ent->content != NULL) &&
6356 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006357 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006358 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006359 }
6360
6361 /*
6362 * Internal check, no parameter entities here ...
6363 */
6364 else {
6365 switch (ent->etype) {
6366 case XML_INTERNAL_PARAMETER_ENTITY:
6367 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006368 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6369 "Attempt to reference the parameter entity '%s'\n",
6370 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006371 break;
6372 default:
6373 break;
6374 }
6375 }
6376
6377 /*
6378 * [ WFC: No Recursion ]
6379 * A parsed entity must not contain a recursive reference
6380 * to itself, either directly or indirectly.
6381 * Done somewhere else
6382 */
6383
6384 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006385 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006386 }
Owen Taylor3473f882001-02-23 17:55:21 +00006387 }
6388 }
6389 return(ent);
6390}
6391
6392/**
6393 * xmlParseStringEntityRef:
6394 * @ctxt: an XML parser context
6395 * @str: a pointer to an index in the string
6396 *
6397 * parse ENTITY references declarations, but this version parses it from
6398 * a string value.
6399 *
6400 * [68] EntityRef ::= '&' Name ';'
6401 *
6402 * [ WFC: Entity Declared ]
6403 * In a document without any DTD, a document with only an internal DTD
6404 * subset which contains no parameter entity references, or a document
6405 * with "standalone='yes'", the Name given in the entity reference
6406 * must match that in an entity declaration, except that well-formed
6407 * documents need not declare any of the following entities: amp, lt,
6408 * gt, apos, quot. The declaration of a parameter entity must precede
6409 * any reference to it. Similarly, the declaration of a general entity
6410 * must precede any reference to it which appears in a default value in an
6411 * attribute-list declaration. Note that if entities are declared in the
6412 * external subset or in external parameter entities, a non-validating
6413 * processor is not obligated to read and process their declarations;
6414 * for such documents, the rule that an entity must be declared is a
6415 * well-formedness constraint only if standalone='yes'.
6416 *
6417 * [ WFC: Parsed Entity ]
6418 * An entity reference must not contain the name of an unparsed entity
6419 *
6420 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6421 * is updated to the current location in the string.
6422 */
6423xmlEntityPtr
6424xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6425 xmlChar *name;
6426 const xmlChar *ptr;
6427 xmlChar cur;
6428 xmlEntityPtr ent = NULL;
6429
6430 if ((str == NULL) || (*str == NULL))
6431 return(NULL);
6432 ptr = *str;
6433 cur = *ptr;
6434 if (cur == '&') {
6435 ptr++;
6436 cur = *ptr;
6437 name = xmlParseStringName(ctxt, &ptr);
6438 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006439 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6440 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006441 } else {
6442 if (*ptr == ';') {
6443 ptr++;
6444 /*
6445 * Ask first SAX for entity resolution, otherwise try the
6446 * predefined set.
6447 */
6448 if (ctxt->sax != NULL) {
6449 if (ctxt->sax->getEntity != NULL)
6450 ent = ctxt->sax->getEntity(ctxt->userData, name);
6451 if (ent == NULL)
6452 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006453 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006454 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006455 }
Owen Taylor3473f882001-02-23 17:55:21 +00006456 }
6457 /*
6458 * [ WFC: Entity Declared ]
6459 * In a document without any DTD, a document with only an
6460 * internal DTD subset which contains no parameter entity
6461 * references, or a document with "standalone='yes'", the
6462 * Name given in the entity reference must match that in an
6463 * entity declaration, except that well-formed documents
6464 * need not declare any of the following entities: amp, lt,
6465 * gt, apos, quot.
6466 * The declaration of a parameter entity must precede any
6467 * reference to it.
6468 * Similarly, the declaration of a general entity must
6469 * precede any reference to it which appears in a default
6470 * value in an attribute-list declaration. Note that if
6471 * entities are declared in the external subset or in
6472 * external parameter entities, a non-validating processor
6473 * is not obligated to read and process their declarations;
6474 * for such documents, the rule that an entity must be
6475 * declared is a well-formedness constraint only if
6476 * standalone='yes'.
6477 */
6478 if (ent == NULL) {
6479 if ((ctxt->standalone == 1) ||
6480 ((ctxt->hasExternalSubset == 0) &&
6481 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006482 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006483 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006484 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006485 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006486 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006487 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006488 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006489 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006490 }
6491
6492 /*
6493 * [ WFC: Parsed Entity ]
6494 * An entity reference must not contain the name of an
6495 * unparsed entity
6496 */
6497 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006498 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006499 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006500 }
6501
6502 /*
6503 * [ WFC: No External Entity References ]
6504 * Attribute values cannot contain direct or indirect
6505 * entity references to external entities.
6506 */
6507 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6508 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006509 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006510 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006511 }
6512 /*
6513 * [ WFC: No < in Attribute Values ]
6514 * The replacement text of any entity referred to directly or
6515 * indirectly in an attribute value (other than "&lt;") must
6516 * not contain a <.
6517 */
6518 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6519 (ent != NULL) &&
6520 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6521 (ent->content != NULL) &&
6522 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006523 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6524 "'<' in entity '%s' is not allowed in attributes values\n",
6525 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006526 }
6527
6528 /*
6529 * Internal check, no parameter entities here ...
6530 */
6531 else {
6532 switch (ent->etype) {
6533 case XML_INTERNAL_PARAMETER_ENTITY:
6534 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006535 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6536 "Attempt to reference the parameter entity '%s'\n",
6537 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006538 break;
6539 default:
6540 break;
6541 }
6542 }
6543
6544 /*
6545 * [ WFC: No Recursion ]
6546 * A parsed entity must not contain a recursive reference
6547 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006548 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006549 */
6550
6551 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006552 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 xmlFree(name);
6555 }
6556 }
6557 *str = ptr;
6558 return(ent);
6559}
6560
6561/**
6562 * xmlParsePEReference:
6563 * @ctxt: an XML parser context
6564 *
6565 * parse PEReference declarations
6566 * The entity content is handled directly by pushing it's content as
6567 * a new input stream.
6568 *
6569 * [69] PEReference ::= '%' Name ';'
6570 *
6571 * [ WFC: No Recursion ]
6572 * A parsed entity must not contain a recursive
6573 * reference to itself, either directly or indirectly.
6574 *
6575 * [ WFC: Entity Declared ]
6576 * In a document without any DTD, a document with only an internal DTD
6577 * subset which contains no parameter entity references, or a document
6578 * with "standalone='yes'", ... ... The declaration of a parameter
6579 * entity must precede any reference to it...
6580 *
6581 * [ VC: Entity Declared ]
6582 * In a document with an external subset or external parameter entities
6583 * with "standalone='no'", ... ... The declaration of a parameter entity
6584 * must precede any reference to it...
6585 *
6586 * [ WFC: In DTD ]
6587 * Parameter-entity references may only appear in the DTD.
6588 * NOTE: misleading but this is handled.
6589 */
6590void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006591xmlParsePEReference(xmlParserCtxtPtr ctxt)
6592{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006593 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 xmlEntityPtr entity = NULL;
6595 xmlParserInputPtr input;
6596
6597 if (RAW == '%') {
6598 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006599 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006600 if (name == NULL) {
6601 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6602 "xmlParsePEReference: no name\n");
6603 } else {
6604 if (RAW == ';') {
6605 NEXT;
6606 if ((ctxt->sax != NULL) &&
6607 (ctxt->sax->getParameterEntity != NULL))
6608 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6609 name);
6610 if (entity == NULL) {
6611 /*
6612 * [ WFC: Entity Declared ]
6613 * In a document without any DTD, a document with only an
6614 * internal DTD subset which contains no parameter entity
6615 * references, or a document with "standalone='yes'", ...
6616 * ... The declaration of a parameter entity must precede
6617 * any reference to it...
6618 */
6619 if ((ctxt->standalone == 1) ||
6620 ((ctxt->hasExternalSubset == 0) &&
6621 (ctxt->hasPErefs == 0))) {
6622 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6623 "PEReference: %%%s; not found\n",
6624 name);
6625 } else {
6626 /*
6627 * [ VC: Entity Declared ]
6628 * In a document with an external subset or external
6629 * parameter entities with "standalone='no'", ...
6630 * ... The declaration of a parameter entity must
6631 * precede any reference to it...
6632 */
6633 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6634 "PEReference: %%%s; not found\n",
6635 name, NULL);
6636 ctxt->valid = 0;
6637 }
6638 } else {
6639 /*
6640 * Internal checking in case the entity quest barfed
6641 */
6642 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6643 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6644 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6645 "Internal: %%%s; is not a parameter entity\n",
6646 name, NULL);
6647 } else if (ctxt->input->free != deallocblankswrapper) {
6648 input =
6649 xmlNewBlanksWrapperInputStream(ctxt, entity);
6650 xmlPushInput(ctxt, input);
6651 } else {
6652 /*
6653 * TODO !!!
6654 * handle the extra spaces added before and after
6655 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6656 */
6657 input = xmlNewEntityInputStream(ctxt, entity);
6658 xmlPushInput(ctxt, input);
6659 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006660 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006661 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006662 xmlParseTextDecl(ctxt);
6663 if (ctxt->errNo ==
6664 XML_ERR_UNSUPPORTED_ENCODING) {
6665 /*
6666 * The XML REC instructs us to stop parsing
6667 * right here
6668 */
6669 ctxt->instate = XML_PARSER_EOF;
6670 return;
6671 }
6672 }
6673 }
6674 }
6675 ctxt->hasPErefs = 1;
6676 } else {
6677 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6678 }
6679 }
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
6681}
6682
6683/**
6684 * xmlParseStringPEReference:
6685 * @ctxt: an XML parser context
6686 * @str: a pointer to an index in the string
6687 *
6688 * parse PEReference declarations
6689 *
6690 * [69] PEReference ::= '%' Name ';'
6691 *
6692 * [ WFC: No Recursion ]
6693 * A parsed entity must not contain a recursive
6694 * reference to itself, either directly or indirectly.
6695 *
6696 * [ WFC: Entity Declared ]
6697 * In a document without any DTD, a document with only an internal DTD
6698 * subset which contains no parameter entity references, or a document
6699 * with "standalone='yes'", ... ... The declaration of a parameter
6700 * entity must precede any reference to it...
6701 *
6702 * [ VC: Entity Declared ]
6703 * In a document with an external subset or external parameter entities
6704 * with "standalone='no'", ... ... The declaration of a parameter entity
6705 * must precede any reference to it...
6706 *
6707 * [ WFC: In DTD ]
6708 * Parameter-entity references may only appear in the DTD.
6709 * NOTE: misleading but this is handled.
6710 *
6711 * Returns the string of the entity content.
6712 * str is updated to the current value of the index
6713 */
6714xmlEntityPtr
6715xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6716 const xmlChar *ptr;
6717 xmlChar cur;
6718 xmlChar *name;
6719 xmlEntityPtr entity = NULL;
6720
6721 if ((str == NULL) || (*str == NULL)) return(NULL);
6722 ptr = *str;
6723 cur = *ptr;
6724 if (cur == '%') {
6725 ptr++;
6726 cur = *ptr;
6727 name = xmlParseStringName(ctxt, &ptr);
6728 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006729 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6730 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006731 } else {
6732 cur = *ptr;
6733 if (cur == ';') {
6734 ptr++;
6735 cur = *ptr;
6736 if ((ctxt->sax != NULL) &&
6737 (ctxt->sax->getParameterEntity != NULL))
6738 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6739 name);
6740 if (entity == NULL) {
6741 /*
6742 * [ WFC: Entity Declared ]
6743 * In a document without any DTD, a document with only an
6744 * internal DTD subset which contains no parameter entity
6745 * references, or a document with "standalone='yes'", ...
6746 * ... The declaration of a parameter entity must precede
6747 * any reference to it...
6748 */
6749 if ((ctxt->standalone == 1) ||
6750 ((ctxt->hasExternalSubset == 0) &&
6751 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006752 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006753 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006754 } else {
6755 /*
6756 * [ VC: Entity Declared ]
6757 * In a document with an external subset or external
6758 * parameter entities with "standalone='no'", ...
6759 * ... The declaration of a parameter entity must
6760 * precede any reference to it...
6761 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006762 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6763 "PEReference: %%%s; not found\n",
6764 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006765 ctxt->valid = 0;
6766 }
6767 } else {
6768 /*
6769 * Internal checking in case the entity quest barfed
6770 */
6771 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6772 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006773 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6774 "%%%s; is not a parameter entity\n",
6775 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006776 }
6777 }
6778 ctxt->hasPErefs = 1;
6779 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006780 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006781 }
6782 xmlFree(name);
6783 }
6784 }
6785 *str = ptr;
6786 return(entity);
6787}
6788
6789/**
6790 * xmlParseDocTypeDecl:
6791 * @ctxt: an XML parser context
6792 *
6793 * parse a DOCTYPE declaration
6794 *
6795 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6796 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6797 *
6798 * [ VC: Root Element Type ]
6799 * The Name in the document type declaration must match the element
6800 * type of the root element.
6801 */
6802
6803void
6804xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006805 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006806 xmlChar *ExternalID = NULL;
6807 xmlChar *URI = NULL;
6808
6809 /*
6810 * We know that '<!DOCTYPE' has been detected.
6811 */
6812 SKIP(9);
6813
6814 SKIP_BLANKS;
6815
6816 /*
6817 * Parse the DOCTYPE name.
6818 */
6819 name = xmlParseName(ctxt);
6820 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006821 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6822 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006823 }
6824 ctxt->intSubName = name;
6825
6826 SKIP_BLANKS;
6827
6828 /*
6829 * Check for SystemID and ExternalID
6830 */
6831 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6832
6833 if ((URI != NULL) || (ExternalID != NULL)) {
6834 ctxt->hasExternalSubset = 1;
6835 }
6836 ctxt->extSubURI = URI;
6837 ctxt->extSubSystem = ExternalID;
6838
6839 SKIP_BLANKS;
6840
6841 /*
6842 * Create and update the internal subset.
6843 */
6844 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6845 (!ctxt->disableSAX))
6846 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6847
6848 /*
6849 * Is there any internal subset declarations ?
6850 * they are handled separately in xmlParseInternalSubset()
6851 */
6852 if (RAW == '[')
6853 return;
6854
6855 /*
6856 * We should be at the end of the DOCTYPE declaration.
6857 */
6858 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006859 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 }
6861 NEXT;
6862}
6863
6864/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006865 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006866 * @ctxt: an XML parser context
6867 *
6868 * parse the internal subset declaration
6869 *
6870 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6871 */
6872
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006873static void
Owen Taylor3473f882001-02-23 17:55:21 +00006874xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6875 /*
6876 * Is there any DTD definition ?
6877 */
6878 if (RAW == '[') {
6879 ctxt->instate = XML_PARSER_DTD;
6880 NEXT;
6881 /*
6882 * Parse the succession of Markup declarations and
6883 * PEReferences.
6884 * Subsequence (markupdecl | PEReference | S)*
6885 */
6886 while (RAW != ']') {
6887 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006888 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006889
6890 SKIP_BLANKS;
6891 xmlParseMarkupDecl(ctxt);
6892 xmlParsePEReference(ctxt);
6893
6894 /*
6895 * Pop-up of finished entities.
6896 */
6897 while ((RAW == 0) && (ctxt->inputNr > 1))
6898 xmlPopInput(ctxt);
6899
6900 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006901 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006902 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006903 break;
6904 }
6905 }
6906 if (RAW == ']') {
6907 NEXT;
6908 SKIP_BLANKS;
6909 }
6910 }
6911
6912 /*
6913 * We should be at the end of the DOCTYPE declaration.
6914 */
6915 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006916 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006917 }
6918 NEXT;
6919}
6920
Daniel Veillard81273902003-09-30 00:43:48 +00006921#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006922/**
6923 * xmlParseAttribute:
6924 * @ctxt: an XML parser context
6925 * @value: a xmlChar ** used to store the value of the attribute
6926 *
6927 * parse an attribute
6928 *
6929 * [41] Attribute ::= Name Eq AttValue
6930 *
6931 * [ WFC: No External Entity References ]
6932 * Attribute values cannot contain direct or indirect entity references
6933 * to external entities.
6934 *
6935 * [ WFC: No < in Attribute Values ]
6936 * The replacement text of any entity referred to directly or indirectly in
6937 * an attribute value (other than "&lt;") must not contain a <.
6938 *
6939 * [ VC: Attribute Value Type ]
6940 * The attribute must have been declared; the value must be of the type
6941 * declared for it.
6942 *
6943 * [25] Eq ::= S? '=' S?
6944 *
6945 * With namespace:
6946 *
6947 * [NS 11] Attribute ::= QName Eq AttValue
6948 *
6949 * Also the case QName == xmlns:??? is handled independently as a namespace
6950 * definition.
6951 *
6952 * Returns the attribute name, and the value in *value.
6953 */
6954
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006955const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006956xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006957 const xmlChar *name;
6958 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006959
6960 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006961 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006962 name = xmlParseName(ctxt);
6963 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006964 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006965 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006966 return(NULL);
6967 }
6968
6969 /*
6970 * read the value
6971 */
6972 SKIP_BLANKS;
6973 if (RAW == '=') {
6974 NEXT;
6975 SKIP_BLANKS;
6976 val = xmlParseAttValue(ctxt);
6977 ctxt->instate = XML_PARSER_CONTENT;
6978 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006979 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006980 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006981 return(NULL);
6982 }
6983
6984 /*
6985 * Check that xml:lang conforms to the specification
6986 * No more registered as an error, just generate a warning now
6987 * since this was deprecated in XML second edition
6988 */
6989 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6990 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006991 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6992 "Malformed value for xml:lang : %s\n",
6993 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006994 }
6995 }
6996
6997 /*
6998 * Check that xml:space conforms to the specification
6999 */
7000 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7001 if (xmlStrEqual(val, BAD_CAST "default"))
7002 *(ctxt->space) = 0;
7003 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7004 *(ctxt->space) = 1;
7005 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007006 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007007"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007008 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007009 }
7010 }
7011
7012 *value = val;
7013 return(name);
7014}
7015
7016/**
7017 * xmlParseStartTag:
7018 * @ctxt: an XML parser context
7019 *
7020 * parse a start of tag either for rule element or
7021 * EmptyElement. In both case we don't parse the tag closing chars.
7022 *
7023 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7024 *
7025 * [ WFC: Unique Att Spec ]
7026 * No attribute name may appear more than once in the same start-tag or
7027 * empty-element tag.
7028 *
7029 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7030 *
7031 * [ WFC: Unique Att Spec ]
7032 * No attribute name may appear more than once in the same start-tag or
7033 * empty-element tag.
7034 *
7035 * With namespace:
7036 *
7037 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7038 *
7039 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7040 *
7041 * Returns the element name parsed
7042 */
7043
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007044const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007045xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007046 const xmlChar *name;
7047 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007048 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007049 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007050 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007051 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007052 int i;
7053
7054 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007055 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007056
7057 name = xmlParseName(ctxt);
7058 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007060 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007061 return(NULL);
7062 }
7063
7064 /*
7065 * Now parse the attributes, it ends up with the ending
7066 *
7067 * (S Attribute)* S?
7068 */
7069 SKIP_BLANKS;
7070 GROW;
7071
Daniel Veillard21a0f912001-02-25 19:54:14 +00007072 while ((RAW != '>') &&
7073 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007074 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007075 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007076 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007077
7078 attname = xmlParseAttribute(ctxt, &attvalue);
7079 if ((attname != NULL) && (attvalue != NULL)) {
7080 /*
7081 * [ WFC: Unique Att Spec ]
7082 * No attribute name may appear more than once in the same
7083 * start-tag or empty-element tag.
7084 */
7085 for (i = 0; i < nbatts;i += 2) {
7086 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007087 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007088 xmlFree(attvalue);
7089 goto failed;
7090 }
7091 }
Owen Taylor3473f882001-02-23 17:55:21 +00007092 /*
7093 * Add the pair to atts
7094 */
7095 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007096 maxatts = 22; /* allow for 10 attrs by default */
7097 atts = (const xmlChar **)
7098 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007099 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007100 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007101 if (attvalue != NULL)
7102 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007103 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007104 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007105 ctxt->atts = atts;
7106 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007107 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007108 const xmlChar **n;
7109
Owen Taylor3473f882001-02-23 17:55:21 +00007110 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007111 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007112 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007113 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007114 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007115 if (attvalue != NULL)
7116 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007117 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007118 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007119 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007120 ctxt->atts = atts;
7121 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007122 }
7123 atts[nbatts++] = attname;
7124 atts[nbatts++] = attvalue;
7125 atts[nbatts] = NULL;
7126 atts[nbatts + 1] = NULL;
7127 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007128 if (attvalue != NULL)
7129 xmlFree(attvalue);
7130 }
7131
7132failed:
7133
Daniel Veillard3772de32002-12-17 10:31:45 +00007134 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007135 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7136 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007137 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7139 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007140 }
7141 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007142 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7143 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007144 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7145 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007146 break;
7147 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007148 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007149 GROW;
7150 }
7151
7152 /*
7153 * SAX: Start of Element !
7154 */
7155 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007156 (!ctxt->disableSAX)) {
7157 if (nbatts > 0)
7158 ctxt->sax->startElement(ctxt->userData, name, atts);
7159 else
7160 ctxt->sax->startElement(ctxt->userData, name, NULL);
7161 }
Owen Taylor3473f882001-02-23 17:55:21 +00007162
7163 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007164 /* Free only the content strings */
7165 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007166 if (atts[i] != NULL)
7167 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007168 }
7169 return(name);
7170}
7171
7172/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007173 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007174 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007175 * @line: line of the start tag
7176 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007177 *
7178 * parse an end of tag
7179 *
7180 * [42] ETag ::= '</' Name S? '>'
7181 *
7182 * With namespace
7183 *
7184 * [NS 9] ETag ::= '</' QName S? '>'
7185 */
7186
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007187static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007188xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007189 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007190
7191 GROW;
7192 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007193 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007194 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007195 return;
7196 }
7197 SKIP(2);
7198
Daniel Veillard46de64e2002-05-29 08:21:33 +00007199 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007200
7201 /*
7202 * We should definitely be at the ending "S? '>'" part
7203 */
7204 GROW;
7205 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007206 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007207 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007208 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007209 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007210
7211 /*
7212 * [ WFC: Element Type Match ]
7213 * The Name in an element's end-tag must match the element type in the
7214 * start-tag.
7215 *
7216 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007217 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007218 if (name == NULL) name = BAD_CAST "unparseable";
7219 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007220 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007221 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007222 }
7223
7224 /*
7225 * SAX: End of Tag
7226 */
7227 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7228 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007229 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007230
Daniel Veillarde57ec792003-09-10 10:50:59 +00007231 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007232 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007233 return;
7234}
7235
7236/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007237 * xmlParseEndTag:
7238 * @ctxt: an XML parser context
7239 *
7240 * parse an end of tag
7241 *
7242 * [42] ETag ::= '</' Name S? '>'
7243 *
7244 * With namespace
7245 *
7246 * [NS 9] ETag ::= '</' QName S? '>'
7247 */
7248
7249void
7250xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007251 xmlParseEndTag1(ctxt, 0);
7252}
Daniel Veillard81273902003-09-30 00:43:48 +00007253#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007254
7255/************************************************************************
7256 * *
7257 * SAX 2 specific operations *
7258 * *
7259 ************************************************************************/
7260
7261static const xmlChar *
7262xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7263 int len = 0, l;
7264 int c;
7265 int count = 0;
7266
7267 /*
7268 * Handler for more complex cases
7269 */
7270 GROW;
7271 c = CUR_CHAR(l);
7272 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007273 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274 return(NULL);
7275 }
7276
7277 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007278 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007279 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007280 (IS_COMBINING(c)) ||
7281 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007282 if (count++ > 100) {
7283 count = 0;
7284 GROW;
7285 }
7286 len += l;
7287 NEXTL(l);
7288 c = CUR_CHAR(l);
7289 }
7290 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7291}
7292
7293/*
7294 * xmlGetNamespace:
7295 * @ctxt: an XML parser context
7296 * @prefix: the prefix to lookup
7297 *
7298 * Lookup the namespace name for the @prefix (which ca be NULL)
7299 * The prefix must come from the @ctxt->dict dictionnary
7300 *
7301 * Returns the namespace name or NULL if not bound
7302 */
7303static const xmlChar *
7304xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7305 int i;
7306
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007309 if (ctxt->nsTab[i] == prefix) {
7310 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7311 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007312 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007313 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007314 return(NULL);
7315}
7316
7317/**
7318 * xmlParseNCName:
7319 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007320 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007321 *
7322 * parse an XML name.
7323 *
7324 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7325 * CombiningChar | Extender
7326 *
7327 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7328 *
7329 * Returns the Name parsed or NULL
7330 */
7331
7332static const xmlChar *
7333xmlParseNCName(xmlParserCtxtPtr ctxt) {
7334 const xmlChar *in;
7335 const xmlChar *ret;
7336 int count = 0;
7337
7338 /*
7339 * Accelerator for simple ASCII names
7340 */
7341 in = ctxt->input->cur;
7342 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7343 ((*in >= 0x41) && (*in <= 0x5A)) ||
7344 (*in == '_')) {
7345 in++;
7346 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7347 ((*in >= 0x41) && (*in <= 0x5A)) ||
7348 ((*in >= 0x30) && (*in <= 0x39)) ||
7349 (*in == '_') || (*in == '-') ||
7350 (*in == '.'))
7351 in++;
7352 if ((*in > 0) && (*in < 0x80)) {
7353 count = in - ctxt->input->cur;
7354 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7355 ctxt->input->cur = in;
7356 ctxt->nbChars += count;
7357 ctxt->input->col += count;
7358 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007359 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007360 }
7361 return(ret);
7362 }
7363 }
7364 return(xmlParseNCNameComplex(ctxt));
7365}
7366
7367/**
7368 * xmlParseQName:
7369 * @ctxt: an XML parser context
7370 * @prefix: pointer to store the prefix part
7371 *
7372 * parse an XML Namespace QName
7373 *
7374 * [6] QName ::= (Prefix ':')? LocalPart
7375 * [7] Prefix ::= NCName
7376 * [8] LocalPart ::= NCName
7377 *
7378 * Returns the Name parsed or NULL
7379 */
7380
7381static const xmlChar *
7382xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7383 const xmlChar *l, *p;
7384
7385 GROW;
7386
7387 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007388 if (l == NULL) {
7389 if (CUR == ':') {
7390 l = xmlParseName(ctxt);
7391 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007392 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7393 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007394 *prefix = NULL;
7395 return(l);
7396 }
7397 }
7398 return(NULL);
7399 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 if (CUR == ':') {
7401 NEXT;
7402 p = l;
7403 l = xmlParseNCName(ctxt);
7404 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007405 xmlChar *tmp;
7406
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007407 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7408 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007409 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7410 p = xmlDictLookup(ctxt->dict, tmp, -1);
7411 if (tmp != NULL) xmlFree(tmp);
7412 *prefix = NULL;
7413 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007414 }
7415 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007416 xmlChar *tmp;
7417
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007418 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7419 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 NEXT;
7421 tmp = (xmlChar *) xmlParseName(ctxt);
7422 if (tmp != NULL) {
7423 tmp = xmlBuildQName(tmp, l, NULL, 0);
7424 l = xmlDictLookup(ctxt->dict, tmp, -1);
7425 if (tmp != NULL) xmlFree(tmp);
7426 *prefix = p;
7427 return(l);
7428 }
7429 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7430 l = xmlDictLookup(ctxt->dict, tmp, -1);
7431 if (tmp != NULL) xmlFree(tmp);
7432 *prefix = p;
7433 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007434 }
7435 *prefix = p;
7436 } else
7437 *prefix = NULL;
7438 return(l);
7439}
7440
7441/**
7442 * xmlParseQNameAndCompare:
7443 * @ctxt: an XML parser context
7444 * @name: the localname
7445 * @prefix: the prefix, if any.
7446 *
7447 * parse an XML name and compares for match
7448 * (specialized for endtag parsing)
7449 *
7450 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7451 * and the name for mismatch
7452 */
7453
7454static const xmlChar *
7455xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7456 xmlChar const *prefix) {
7457 const xmlChar *cmp = name;
7458 const xmlChar *in;
7459 const xmlChar *ret;
7460 const xmlChar *prefix2;
7461
7462 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7463
7464 GROW;
7465 in = ctxt->input->cur;
7466
7467 cmp = prefix;
7468 while (*in != 0 && *in == *cmp) {
7469 ++in;
7470 ++cmp;
7471 }
7472 if ((*cmp == 0) && (*in == ':')) {
7473 in++;
7474 cmp = name;
7475 while (*in != 0 && *in == *cmp) {
7476 ++in;
7477 ++cmp;
7478 }
William M. Brack76e95df2003-10-18 16:20:14 +00007479 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007480 /* success */
7481 ctxt->input->cur = in;
7482 return((const xmlChar*) 1);
7483 }
7484 }
7485 /*
7486 * all strings coms from the dictionary, equality can be done directly
7487 */
7488 ret = xmlParseQName (ctxt, &prefix2);
7489 if ((ret == name) && (prefix == prefix2))
7490 return((const xmlChar*) 1);
7491 return ret;
7492}
7493
7494/**
7495 * xmlParseAttValueInternal:
7496 * @ctxt: an XML parser context
7497 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007498 * @alloc: whether the attribute was reallocated as a new string
7499 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007500 *
7501 * parse a value for an attribute.
7502 * NOTE: if no normalization is needed, the routine will return pointers
7503 * directly from the data buffer.
7504 *
7505 * 3.3.3 Attribute-Value Normalization:
7506 * Before the value of an attribute is passed to the application or
7507 * checked for validity, the XML processor must normalize it as follows:
7508 * - a character reference is processed by appending the referenced
7509 * character to the attribute value
7510 * - an entity reference is processed by recursively processing the
7511 * replacement text of the entity
7512 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7513 * appending #x20 to the normalized value, except that only a single
7514 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7515 * parsed entity or the literal entity value of an internal parsed entity
7516 * - other characters are processed by appending them to the normalized value
7517 * If the declared value is not CDATA, then the XML processor must further
7518 * process the normalized attribute value by discarding any leading and
7519 * trailing space (#x20) characters, and by replacing sequences of space
7520 * (#x20) characters by a single space (#x20) character.
7521 * All attributes for which no declaration has been read should be treated
7522 * by a non-validating parser as if declared CDATA.
7523 *
7524 * Returns the AttValue parsed or NULL. The value has to be freed by the
7525 * caller if it was copied, this can be detected by val[*len] == 0.
7526 */
7527
7528static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007529xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7530 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007531{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007532 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007533 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534 xmlChar *ret = NULL;
7535
7536 GROW;
7537 in = (xmlChar *) CUR_PTR;
7538 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007539 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007540 return (NULL);
7541 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007543
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007544 /*
7545 * try to handle in this routine the most common case where no
7546 * allocation of a new string is required and where content is
7547 * pure ASCII.
7548 */
7549 limit = *in++;
7550 end = ctxt->input->end;
7551 start = in;
7552 if (in >= end) {
7553 const xmlChar *oldbase = ctxt->input->base;
7554 GROW;
7555 if (oldbase != ctxt->input->base) {
7556 long delta = ctxt->input->base - oldbase;
7557 start = start + delta;
7558 in = in + delta;
7559 }
7560 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007561 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007562 if (normalize) {
7563 /*
7564 * Skip any leading spaces
7565 */
7566 while ((in < end) && (*in != limit) &&
7567 ((*in == 0x20) || (*in == 0x9) ||
7568 (*in == 0xA) || (*in == 0xD))) {
7569 in++;
7570 start = in;
7571 if (in >= end) {
7572 const xmlChar *oldbase = ctxt->input->base;
7573 GROW;
7574 if (oldbase != ctxt->input->base) {
7575 long delta = ctxt->input->base - oldbase;
7576 start = start + delta;
7577 in = in + delta;
7578 }
7579 end = ctxt->input->end;
7580 }
7581 }
7582 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7583 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7584 if ((*in++ == 0x20) && (*in == 0x20)) break;
7585 if (in >= end) {
7586 const xmlChar *oldbase = ctxt->input->base;
7587 GROW;
7588 if (oldbase != ctxt->input->base) {
7589 long delta = ctxt->input->base - oldbase;
7590 start = start + delta;
7591 in = in + delta;
7592 }
7593 end = ctxt->input->end;
7594 }
7595 }
7596 last = in;
7597 /*
7598 * skip the trailing blanks
7599 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007600 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007601 while ((in < end) && (*in != limit) &&
7602 ((*in == 0x20) || (*in == 0x9) ||
7603 (*in == 0xA) || (*in == 0xD))) {
7604 in++;
7605 if (in >= end) {
7606 const xmlChar *oldbase = ctxt->input->base;
7607 GROW;
7608 if (oldbase != ctxt->input->base) {
7609 long delta = ctxt->input->base - oldbase;
7610 start = start + delta;
7611 in = in + delta;
7612 last = last + delta;
7613 }
7614 end = ctxt->input->end;
7615 }
7616 }
7617 if (*in != limit) goto need_complex;
7618 } else {
7619 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7620 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7621 in++;
7622 if (in >= end) {
7623 const xmlChar *oldbase = ctxt->input->base;
7624 GROW;
7625 if (oldbase != ctxt->input->base) {
7626 long delta = ctxt->input->base - oldbase;
7627 start = start + delta;
7628 in = in + delta;
7629 }
7630 end = ctxt->input->end;
7631 }
7632 }
7633 last = in;
7634 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007635 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007636 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007637 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007638 *len = last - start;
7639 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007640 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007641 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007642 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007643 }
7644 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007645 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007647need_complex:
7648 if (alloc) *alloc = 1;
7649 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650}
7651
7652/**
7653 * xmlParseAttribute2:
7654 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007655 * @pref: the element prefix
7656 * @elem: the element name
7657 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007658 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007659 * @len: an int * to save the length of the attribute
7660 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007661 *
7662 * parse an attribute in the new SAX2 framework.
7663 *
7664 * Returns the attribute name, and the value in *value, .
7665 */
7666
7667static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007668xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7669 const xmlChar *pref, const xmlChar *elem,
7670 const xmlChar **prefix, xmlChar **value,
7671 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007673 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007674 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007675
7676 *value = NULL;
7677 GROW;
7678 name = xmlParseQName(ctxt, prefix);
7679 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007682 return(NULL);
7683 }
7684
7685 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007686 * get the type if needed
7687 */
7688 if (ctxt->attsSpecial != NULL) {
7689 int type;
7690
7691 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7692 pref, elem, *prefix, name);
7693 if (type != 0) normalize = 1;
7694 }
7695
7696 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007697 * read the value
7698 */
7699 SKIP_BLANKS;
7700 if (RAW == '=') {
7701 NEXT;
7702 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007703 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 ctxt->instate = XML_PARSER_CONTENT;
7705 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007706 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007707 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 return(NULL);
7709 }
7710
Daniel Veillardd8925572005-06-08 22:34:55 +00007711 if (*prefix == ctxt->str_xml) {
7712 /*
7713 * Check that xml:lang conforms to the specification
7714 * No more registered as an error, just generate a warning now
7715 * since this was deprecated in XML second edition
7716 */
7717 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7718 internal_val = xmlStrndup(val, *len);
7719 if (!xmlCheckLanguageID(internal_val)) {
7720 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7721 "Malformed value for xml:lang : %s\n",
7722 internal_val, NULL);
7723 }
7724 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725
Daniel Veillardd8925572005-06-08 22:34:55 +00007726 /*
7727 * Check that xml:space conforms to the specification
7728 */
7729 if (xmlStrEqual(name, BAD_CAST "space")) {
7730 internal_val = xmlStrndup(val, *len);
7731 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7732 *(ctxt->space) = 0;
7733 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7734 *(ctxt->space) = 1;
7735 else {
7736 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007738 internal_val, NULL);
7739 }
7740 }
7741 if (internal_val) {
7742 xmlFree(internal_val);
7743 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007744 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745
7746 *value = val;
7747 return(name);
7748}
7749
7750/**
7751 * xmlParseStartTag2:
7752 * @ctxt: an XML parser context
7753 *
7754 * parse a start of tag either for rule element or
7755 * EmptyElement. In both case we don't parse the tag closing chars.
7756 * This routine is called when running SAX2 parsing
7757 *
7758 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7759 *
7760 * [ WFC: Unique Att Spec ]
7761 * No attribute name may appear more than once in the same start-tag or
7762 * empty-element tag.
7763 *
7764 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7765 *
7766 * [ WFC: Unique Att Spec ]
7767 * No attribute name may appear more than once in the same start-tag or
7768 * empty-element tag.
7769 *
7770 * With namespace:
7771 *
7772 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7773 *
7774 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7775 *
7776 * Returns the element name parsed
7777 */
7778
7779static const xmlChar *
7780xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007781 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 const xmlChar *localname;
7783 const xmlChar *prefix;
7784 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007785 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007786 const xmlChar *nsname;
7787 xmlChar *attvalue;
7788 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007789 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007790 int nratts, nbatts, nbdef;
7791 int i, j, nbNs, attval;
7792 const xmlChar *base;
7793 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007794 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007795
7796 if (RAW != '<') return(NULL);
7797 NEXT1;
7798
7799 /*
7800 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7801 * point since the attribute values may be stored as pointers to
7802 * the buffer and calling SHRINK would destroy them !
7803 * The Shrinking is only possible once the full set of attribute
7804 * callbacks have been done.
7805 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007806reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007807 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007808 base = ctxt->input->base;
7809 cur = ctxt->input->cur - ctxt->input->base;
7810 nbatts = 0;
7811 nratts = 0;
7812 nbdef = 0;
7813 nbNs = 0;
7814 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007815 /* Forget any namespaces added during an earlier parse of this element. */
7816 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817
7818 localname = xmlParseQName(ctxt, &prefix);
7819 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7821 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822 return(NULL);
7823 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007824 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007825
7826 /*
7827 * Now parse the attributes, it ends up with the ending
7828 *
7829 * (S Attribute)* S?
7830 */
7831 SKIP_BLANKS;
7832 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007833 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007834
7835 while ((RAW != '>') &&
7836 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007837 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007838 const xmlChar *q = CUR_PTR;
7839 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007840 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007841
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007842 attname = xmlParseAttribute2(ctxt, prefix, localname,
7843 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007844 if ((attname != NULL) && (attvalue != NULL)) {
7845 if (len < 0) len = xmlStrlen(attvalue);
7846 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007847 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7848 xmlURIPtr uri;
7849
7850 if (*URL != 0) {
7851 uri = xmlParseURI((const char *) URL);
7852 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007853 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7854 "xmlns: %s not a valid URI\n",
7855 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007856 } else {
7857 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007858 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7859 "xmlns: URI %s is not absolute\n",
7860 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007861 }
7862 xmlFreeURI(uri);
7863 }
7864 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007866 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007867 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007868 for (j = 1;j <= nbNs;j++)
7869 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7870 break;
7871 if (j <= nbNs)
7872 xmlErrAttributeDup(ctxt, NULL, attname);
7873 else
7874 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007875 if (alloc != 0) xmlFree(attvalue);
7876 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007877 continue;
7878 }
7879 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007880 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7881 xmlURIPtr uri;
7882
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007883 if (attname == ctxt->str_xml) {
7884 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007885 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7886 "xml namespace prefix mapped to wrong URI\n",
7887 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007888 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007889 /*
7890 * Do not keep a namespace definition node
7891 */
7892 if (alloc != 0) xmlFree(attvalue);
7893 SKIP_BLANKS;
7894 continue;
7895 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007896 uri = xmlParseURI((const char *) URL);
7897 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007898 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7899 "xmlns:%s: '%s' is not a valid URI\n",
7900 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007901 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007902 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007903 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7904 "xmlns:%s: URI %s is not absolute\n",
7905 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007906 }
7907 xmlFreeURI(uri);
7908 }
7909
Daniel Veillard0fb18932003-09-07 09:14:37 +00007910 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007911 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007912 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007913 for (j = 1;j <= nbNs;j++)
7914 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7915 break;
7916 if (j <= nbNs)
7917 xmlErrAttributeDup(ctxt, aprefix, attname);
7918 else
7919 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007920 if (alloc != 0) xmlFree(attvalue);
7921 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007922 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007923 continue;
7924 }
7925
7926 /*
7927 * Add the pair to atts
7928 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007929 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7930 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931 if (attvalue[len] == 0)
7932 xmlFree(attvalue);
7933 goto failed;
7934 }
7935 maxatts = ctxt->maxatts;
7936 atts = ctxt->atts;
7937 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007938 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007939 atts[nbatts++] = attname;
7940 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007941 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007942 atts[nbatts++] = attvalue;
7943 attvalue += len;
7944 atts[nbatts++] = attvalue;
7945 /*
7946 * tag if some deallocation is needed
7947 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007948 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007949 } else {
7950 if ((attvalue != NULL) && (attvalue[len] == 0))
7951 xmlFree(attvalue);
7952 }
7953
7954failed:
7955
7956 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007957 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007958 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7959 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007960 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007961 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7962 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007963 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 }
7965 SKIP_BLANKS;
7966 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7967 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007968 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007969 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 break;
7971 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007973 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974 }
7975
Daniel Veillard0fb18932003-09-07 09:14:37 +00007976 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007977 * The attributes defaulting
7978 */
7979 if (ctxt->attsDefault != NULL) {
7980 xmlDefAttrsPtr defaults;
7981
7982 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7983 if (defaults != NULL) {
7984 for (i = 0;i < defaults->nbAttrs;i++) {
7985 attname = defaults->values[4 * i];
7986 aprefix = defaults->values[4 * i + 1];
7987
7988 /*
7989 * special work for namespaces defaulted defs
7990 */
7991 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7992 /*
7993 * check that it's not a defined namespace
7994 */
7995 for (j = 1;j <= nbNs;j++)
7996 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7997 break;
7998 if (j <= nbNs) continue;
7999
8000 nsname = xmlGetNamespace(ctxt, NULL);
8001 if (nsname != defaults->values[4 * i + 2]) {
8002 if (nsPush(ctxt, NULL,
8003 defaults->values[4 * i + 2]) > 0)
8004 nbNs++;
8005 }
8006 } else if (aprefix == ctxt->str_xmlns) {
8007 /*
8008 * check that it's not a defined namespace
8009 */
8010 for (j = 1;j <= nbNs;j++)
8011 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8012 break;
8013 if (j <= nbNs) continue;
8014
8015 nsname = xmlGetNamespace(ctxt, attname);
8016 if (nsname != defaults->values[2]) {
8017 if (nsPush(ctxt, attname,
8018 defaults->values[4 * i + 2]) > 0)
8019 nbNs++;
8020 }
8021 } else {
8022 /*
8023 * check that it's not a defined attribute
8024 */
8025 for (j = 0;j < nbatts;j+=5) {
8026 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8027 break;
8028 }
8029 if (j < nbatts) continue;
8030
8031 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8032 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008033 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008034 }
8035 maxatts = ctxt->maxatts;
8036 atts = ctxt->atts;
8037 }
8038 atts[nbatts++] = attname;
8039 atts[nbatts++] = aprefix;
8040 if (aprefix == NULL)
8041 atts[nbatts++] = NULL;
8042 else
8043 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8044 atts[nbatts++] = defaults->values[4 * i + 2];
8045 atts[nbatts++] = defaults->values[4 * i + 3];
8046 nbdef++;
8047 }
8048 }
8049 }
8050 }
8051
Daniel Veillarde70c8772003-11-25 07:21:18 +00008052 /*
8053 * The attributes checkings
8054 */
8055 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008056 /*
8057 * The default namespace does not apply to attribute names.
8058 */
8059 if (atts[i + 1] != NULL) {
8060 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8061 if (nsname == NULL) {
8062 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8063 "Namespace prefix %s for %s on %s is not defined\n",
8064 atts[i + 1], atts[i], localname);
8065 }
8066 atts[i + 2] = nsname;
8067 } else
8068 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008069 /*
8070 * [ WFC: Unique Att Spec ]
8071 * No attribute name may appear more than once in the same
8072 * start-tag or empty-element tag.
8073 * As extended by the Namespace in XML REC.
8074 */
8075 for (j = 0; j < i;j += 5) {
8076 if (atts[i] == atts[j]) {
8077 if (atts[i+1] == atts[j+1]) {
8078 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8079 break;
8080 }
8081 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8082 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8083 "Namespaced Attribute %s in '%s' redefined\n",
8084 atts[i], nsname, NULL);
8085 break;
8086 }
8087 }
8088 }
8089 }
8090
Daniel Veillarde57ec792003-09-10 10:50:59 +00008091 nsname = xmlGetNamespace(ctxt, prefix);
8092 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008093 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8094 "Namespace prefix %s on %s is not defined\n",
8095 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008096 }
8097 *pref = prefix;
8098 *URI = nsname;
8099
8100 /*
8101 * SAX: Start of Element !
8102 */
8103 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8104 (!ctxt->disableSAX)) {
8105 if (nbNs > 0)
8106 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8107 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8108 nbatts / 5, nbdef, atts);
8109 else
8110 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8111 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8112 }
8113
8114 /*
8115 * Free up attribute allocated strings if needed
8116 */
8117 if (attval != 0) {
8118 for (i = 3,j = 0; j < nratts;i += 5,j++)
8119 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8120 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008121 }
8122
8123 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008124
8125base_changed:
8126 /*
8127 * the attribute strings are valid iif the base didn't changed
8128 */
8129 if (attval != 0) {
8130 for (i = 3,j = 0; j < nratts;i += 5,j++)
8131 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8132 xmlFree((xmlChar *) atts[i]);
8133 }
8134 ctxt->input->cur = ctxt->input->base + cur;
8135 if (ctxt->wellFormed == 1) {
8136 goto reparse;
8137 }
8138 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008139}
8140
8141/**
8142 * xmlParseEndTag2:
8143 * @ctxt: an XML parser context
8144 * @line: line of the start tag
8145 * @nsNr: number of namespaces on the start tag
8146 *
8147 * parse an end of tag
8148 *
8149 * [42] ETag ::= '</' Name S? '>'
8150 *
8151 * With namespace
8152 *
8153 * [NS 9] ETag ::= '</' QName S? '>'
8154 */
8155
8156static void
8157xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008158 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008159 const xmlChar *name;
8160
8161 GROW;
8162 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008163 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008164 return;
8165 }
8166 SKIP(2);
8167
William M. Brack13dfa872004-09-18 04:52:08 +00008168 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008169 if (ctxt->input->cur[tlen] == '>') {
8170 ctxt->input->cur += tlen + 1;
8171 goto done;
8172 }
8173 ctxt->input->cur += tlen;
8174 name = (xmlChar*)1;
8175 } else {
8176 if (prefix == NULL)
8177 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8178 else
8179 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8180 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181
8182 /*
8183 * We should definitely be at the ending "S? '>'" part
8184 */
8185 GROW;
8186 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008187 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008188 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008189 } else
8190 NEXT1;
8191
8192 /*
8193 * [ WFC: Element Type Match ]
8194 * The Name in an element's end-tag must match the element type in the
8195 * start-tag.
8196 *
8197 */
8198 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008199 if (name == NULL) name = BAD_CAST "unparseable";
8200 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008202 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203 }
8204
8205 /*
8206 * SAX: End of Tag
8207 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008208done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8210 (!ctxt->disableSAX))
8211 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8212
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 spacePop(ctxt);
8214 if (nsNr != 0)
8215 nsPop(ctxt, nsNr);
8216 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008217}
8218
8219/**
Owen Taylor3473f882001-02-23 17:55:21 +00008220 * xmlParseCDSect:
8221 * @ctxt: an XML parser context
8222 *
8223 * Parse escaped pure raw content.
8224 *
8225 * [18] CDSect ::= CDStart CData CDEnd
8226 *
8227 * [19] CDStart ::= '<![CDATA['
8228 *
8229 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8230 *
8231 * [21] CDEnd ::= ']]>'
8232 */
8233void
8234xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8235 xmlChar *buf = NULL;
8236 int len = 0;
8237 int size = XML_PARSER_BUFFER_SIZE;
8238 int r, rl;
8239 int s, sl;
8240 int cur, l;
8241 int count = 0;
8242
Daniel Veillard8f597c32003-10-06 08:19:27 +00008243 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008244 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008245 SKIP(9);
8246 } else
8247 return;
8248
8249 ctxt->instate = XML_PARSER_CDATA_SECTION;
8250 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008251 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008252 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008253 ctxt->instate = XML_PARSER_CONTENT;
8254 return;
8255 }
8256 NEXTL(rl);
8257 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008258 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008259 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 ctxt->instate = XML_PARSER_CONTENT;
8261 return;
8262 }
8263 NEXTL(sl);
8264 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008265 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008266 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008267 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008268 return;
8269 }
William M. Brack871611b2003-10-18 04:53:14 +00008270 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008271 ((r != ']') || (s != ']') || (cur != '>'))) {
8272 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008273 xmlChar *tmp;
8274
Owen Taylor3473f882001-02-23 17:55:21 +00008275 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008276 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8277 if (tmp == NULL) {
8278 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008279 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008280 return;
8281 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008282 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008283 }
8284 COPY_BUF(rl,buf,len,r);
8285 r = s;
8286 rl = sl;
8287 s = cur;
8288 sl = l;
8289 count++;
8290 if (count > 50) {
8291 GROW;
8292 count = 0;
8293 }
8294 NEXTL(l);
8295 cur = CUR_CHAR(l);
8296 }
8297 buf[len] = 0;
8298 ctxt->instate = XML_PARSER_CONTENT;
8299 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008300 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008301 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008302 xmlFree(buf);
8303 return;
8304 }
8305 NEXTL(l);
8306
8307 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008308 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008309 */
8310 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8311 if (ctxt->sax->cdataBlock != NULL)
8312 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008313 else if (ctxt->sax->characters != NULL)
8314 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008315 }
8316 xmlFree(buf);
8317}
8318
8319/**
8320 * xmlParseContent:
8321 * @ctxt: an XML parser context
8322 *
8323 * Parse a content:
8324 *
8325 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8326 */
8327
8328void
8329xmlParseContent(xmlParserCtxtPtr ctxt) {
8330 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008331 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008332 ((RAW != '<') || (NXT(1) != '/'))) {
8333 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008334 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008335 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008336
8337 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008338 * First case : a Processing Instruction.
8339 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008340 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008341 xmlParsePI(ctxt);
8342 }
8343
8344 /*
8345 * Second case : a CDSection
8346 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008347 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008348 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008349 xmlParseCDSect(ctxt);
8350 }
8351
8352 /*
8353 * Third case : a comment
8354 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008355 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008356 (NXT(2) == '-') && (NXT(3) == '-')) {
8357 xmlParseComment(ctxt);
8358 ctxt->instate = XML_PARSER_CONTENT;
8359 }
8360
8361 /*
8362 * Fourth case : a sub-element.
8363 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008364 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008365 xmlParseElement(ctxt);
8366 }
8367
8368 /*
8369 * Fifth case : a reference. If if has not been resolved,
8370 * parsing returns it's Name, create the node
8371 */
8372
Daniel Veillard21a0f912001-02-25 19:54:14 +00008373 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008374 xmlParseReference(ctxt);
8375 }
8376
8377 /*
8378 * Last case, text. Note that References are handled directly.
8379 */
8380 else {
8381 xmlParseCharData(ctxt, 0);
8382 }
8383
8384 GROW;
8385 /*
8386 * Pop-up of finished entities.
8387 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008388 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008389 xmlPopInput(ctxt);
8390 SHRINK;
8391
Daniel Veillardfdc91562002-07-01 21:52:03 +00008392 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008393 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8394 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008395 ctxt->instate = XML_PARSER_EOF;
8396 break;
8397 }
8398 }
8399}
8400
8401/**
8402 * xmlParseElement:
8403 * @ctxt: an XML parser context
8404 *
8405 * parse an XML element, this is highly recursive
8406 *
8407 * [39] element ::= EmptyElemTag | STag content ETag
8408 *
8409 * [ WFC: Element Type Match ]
8410 * The Name in an element's end-tag must match the element type in the
8411 * start-tag.
8412 *
Owen Taylor3473f882001-02-23 17:55:21 +00008413 */
8414
8415void
8416xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008417 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008418 const xmlChar *prefix;
8419 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008420 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008421 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008422 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008424
8425 /* Capture start position */
8426 if (ctxt->record_info) {
8427 node_info.begin_pos = ctxt->input->consumed +
8428 (CUR_PTR - ctxt->input->base);
8429 node_info.begin_line = ctxt->input->line;
8430 }
8431
8432 if (ctxt->spaceNr == 0)
8433 spacePush(ctxt, -1);
8434 else
8435 spacePush(ctxt, *ctxt->space);
8436
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008437 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008438#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008440#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008441 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008442#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008443 else
8444 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008445#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008446 if (name == NULL) {
8447 spacePop(ctxt);
8448 return;
8449 }
8450 namePush(ctxt, name);
8451 ret = ctxt->node;
8452
Daniel Veillard4432df22003-09-28 18:58:27 +00008453#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008454 /*
8455 * [ VC: Root Element Type ]
8456 * The Name in the document type declaration must match the element
8457 * type of the root element.
8458 */
8459 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8460 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8461 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008462#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008463
8464 /*
8465 * Check for an Empty Element.
8466 */
8467 if ((RAW == '/') && (NXT(1) == '>')) {
8468 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008469 if (ctxt->sax2) {
8470 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8471 (!ctxt->disableSAX))
8472 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008473#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008474 } else {
8475 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8476 (!ctxt->disableSAX))
8477 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008478#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008479 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480 namePop(ctxt);
8481 spacePop(ctxt);
8482 if (nsNr != ctxt->nsNr)
8483 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008484 if ( ret != NULL && ctxt->record_info ) {
8485 node_info.end_pos = ctxt->input->consumed +
8486 (CUR_PTR - ctxt->input->base);
8487 node_info.end_line = ctxt->input->line;
8488 node_info.node = ret;
8489 xmlParserAddNodeInfo(ctxt, &node_info);
8490 }
8491 return;
8492 }
8493 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008494 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008495 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008496 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8497 "Couldn't find end of Start Tag %s line %d\n",
8498 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008499
8500 /*
8501 * end of parsing of this node.
8502 */
8503 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008504 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008505 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008506 if (nsNr != ctxt->nsNr)
8507 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008508
8509 /*
8510 * Capture end position and add node
8511 */
8512 if ( ret != NULL && ctxt->record_info ) {
8513 node_info.end_pos = ctxt->input->consumed +
8514 (CUR_PTR - ctxt->input->base);
8515 node_info.end_line = ctxt->input->line;
8516 node_info.node = ret;
8517 xmlParserAddNodeInfo(ctxt, &node_info);
8518 }
8519 return;
8520 }
8521
8522 /*
8523 * Parse the content of the element:
8524 */
8525 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008526 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008527 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008528 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008529 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008530
8531 /*
8532 * end of parsing of this node.
8533 */
8534 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008535 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008536 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008537 if (nsNr != ctxt->nsNr)
8538 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008539 return;
8540 }
8541
8542 /*
8543 * parse the end of tag: '</' should be here.
8544 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008545 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008546 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008547 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008548 }
8549#ifdef LIBXML_SAX1_ENABLED
8550 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008551 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008552#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 /*
8555 * Capture end position and add node
8556 */
8557 if ( ret != NULL && ctxt->record_info ) {
8558 node_info.end_pos = ctxt->input->consumed +
8559 (CUR_PTR - ctxt->input->base);
8560 node_info.end_line = ctxt->input->line;
8561 node_info.node = ret;
8562 xmlParserAddNodeInfo(ctxt, &node_info);
8563 }
8564}
8565
8566/**
8567 * xmlParseVersionNum:
8568 * @ctxt: an XML parser context
8569 *
8570 * parse the XML version value.
8571 *
8572 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8573 *
8574 * Returns the string giving the XML version number, or NULL
8575 */
8576xmlChar *
8577xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8578 xmlChar *buf = NULL;
8579 int len = 0;
8580 int size = 10;
8581 xmlChar cur;
8582
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008583 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008584 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008585 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008586 return(NULL);
8587 }
8588 cur = CUR;
8589 while (((cur >= 'a') && (cur <= 'z')) ||
8590 ((cur >= 'A') && (cur <= 'Z')) ||
8591 ((cur >= '0') && (cur <= '9')) ||
8592 (cur == '_') || (cur == '.') ||
8593 (cur == ':') || (cur == '-')) {
8594 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008595 xmlChar *tmp;
8596
Owen Taylor3473f882001-02-23 17:55:21 +00008597 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008598 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8599 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008600 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008601 return(NULL);
8602 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008603 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008604 }
8605 buf[len++] = cur;
8606 NEXT;
8607 cur=CUR;
8608 }
8609 buf[len] = 0;
8610 return(buf);
8611}
8612
8613/**
8614 * xmlParseVersionInfo:
8615 * @ctxt: an XML parser context
8616 *
8617 * parse the XML version.
8618 *
8619 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8620 *
8621 * [25] Eq ::= S? '=' S?
8622 *
8623 * Returns the version string, e.g. "1.0"
8624 */
8625
8626xmlChar *
8627xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8628 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008629
Daniel Veillarda07050d2003-10-19 14:46:32 +00008630 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008631 SKIP(7);
8632 SKIP_BLANKS;
8633 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008634 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008635 return(NULL);
8636 }
8637 NEXT;
8638 SKIP_BLANKS;
8639 if (RAW == '"') {
8640 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008641 version = xmlParseVersionNum(ctxt);
8642 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008643 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 } else
8645 NEXT;
8646 } else if (RAW == '\''){
8647 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008648 version = xmlParseVersionNum(ctxt);
8649 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008650 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008651 } else
8652 NEXT;
8653 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 }
8656 }
8657 return(version);
8658}
8659
8660/**
8661 * xmlParseEncName:
8662 * @ctxt: an XML parser context
8663 *
8664 * parse the XML encoding name
8665 *
8666 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8667 *
8668 * Returns the encoding name value or NULL
8669 */
8670xmlChar *
8671xmlParseEncName(xmlParserCtxtPtr ctxt) {
8672 xmlChar *buf = NULL;
8673 int len = 0;
8674 int size = 10;
8675 xmlChar cur;
8676
8677 cur = CUR;
8678 if (((cur >= 'a') && (cur <= 'z')) ||
8679 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008680 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008681 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008682 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008683 return(NULL);
8684 }
8685
8686 buf[len++] = cur;
8687 NEXT;
8688 cur = CUR;
8689 while (((cur >= 'a') && (cur <= 'z')) ||
8690 ((cur >= 'A') && (cur <= 'Z')) ||
8691 ((cur >= '0') && (cur <= '9')) ||
8692 (cur == '.') || (cur == '_') ||
8693 (cur == '-')) {
8694 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008695 xmlChar *tmp;
8696
Owen Taylor3473f882001-02-23 17:55:21 +00008697 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008698 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8699 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008700 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008701 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008702 return(NULL);
8703 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008704 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008705 }
8706 buf[len++] = cur;
8707 NEXT;
8708 cur = CUR;
8709 if (cur == 0) {
8710 SHRINK;
8711 GROW;
8712 cur = CUR;
8713 }
8714 }
8715 buf[len] = 0;
8716 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008717 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008718 }
8719 return(buf);
8720}
8721
8722/**
8723 * xmlParseEncodingDecl:
8724 * @ctxt: an XML parser context
8725 *
8726 * parse the XML encoding declaration
8727 *
8728 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8729 *
8730 * this setups the conversion filters.
8731 *
8732 * Returns the encoding value or NULL
8733 */
8734
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008735const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008736xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8737 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008738
8739 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008740 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008741 SKIP(8);
8742 SKIP_BLANKS;
8743 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008744 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008745 return(NULL);
8746 }
8747 NEXT;
8748 SKIP_BLANKS;
8749 if (RAW == '"') {
8750 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008751 encoding = xmlParseEncName(ctxt);
8752 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008753 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008754 } else
8755 NEXT;
8756 } else if (RAW == '\''){
8757 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008758 encoding = xmlParseEncName(ctxt);
8759 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008760 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008761 } else
8762 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008763 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008764 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008765 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008766 /*
8767 * UTF-16 encoding stwich has already taken place at this stage,
8768 * more over the little-endian/big-endian selection is already done
8769 */
8770 if ((encoding != NULL) &&
8771 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8772 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008773 if (ctxt->encoding != NULL)
8774 xmlFree((xmlChar *) ctxt->encoding);
8775 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008776 }
8777 /*
8778 * UTF-8 encoding is handled natively
8779 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008780 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008781 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8782 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008783 if (ctxt->encoding != NULL)
8784 xmlFree((xmlChar *) ctxt->encoding);
8785 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008786 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008787 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008788 xmlCharEncodingHandlerPtr handler;
8789
8790 if (ctxt->input->encoding != NULL)
8791 xmlFree((xmlChar *) ctxt->input->encoding);
8792 ctxt->input->encoding = encoding;
8793
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008794 handler = xmlFindCharEncodingHandler((const char *) encoding);
8795 if (handler != NULL) {
8796 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008797 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008798 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008799 "Unsupported encoding %s\n", encoding);
8800 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008801 }
8802 }
8803 }
8804 return(encoding);
8805}
8806
8807/**
8808 * xmlParseSDDecl:
8809 * @ctxt: an XML parser context
8810 *
8811 * parse the XML standalone declaration
8812 *
8813 * [32] SDDecl ::= S 'standalone' Eq
8814 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8815 *
8816 * [ VC: Standalone Document Declaration ]
8817 * TODO The standalone document declaration must have the value "no"
8818 * if any external markup declarations contain declarations of:
8819 * - attributes with default values, if elements to which these
8820 * attributes apply appear in the document without specifications
8821 * of values for these attributes, or
8822 * - entities (other than amp, lt, gt, apos, quot), if references
8823 * to those entities appear in the document, or
8824 * - attributes with values subject to normalization, where the
8825 * attribute appears in the document with a value which will change
8826 * as a result of normalization, or
8827 * - element types with element content, if white space occurs directly
8828 * within any instance of those types.
8829 *
8830 * Returns 1 if standalone, 0 otherwise
8831 */
8832
8833int
8834xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8835 int standalone = -1;
8836
8837 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008838 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008839 SKIP(10);
8840 SKIP_BLANKS;
8841 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008842 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008843 return(standalone);
8844 }
8845 NEXT;
8846 SKIP_BLANKS;
8847 if (RAW == '\''){
8848 NEXT;
8849 if ((RAW == 'n') && (NXT(1) == 'o')) {
8850 standalone = 0;
8851 SKIP(2);
8852 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8853 (NXT(2) == 's')) {
8854 standalone = 1;
8855 SKIP(3);
8856 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008857 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008858 }
8859 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008860 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008861 } else
8862 NEXT;
8863 } else if (RAW == '"'){
8864 NEXT;
8865 if ((RAW == 'n') && (NXT(1) == 'o')) {
8866 standalone = 0;
8867 SKIP(2);
8868 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8869 (NXT(2) == 's')) {
8870 standalone = 1;
8871 SKIP(3);
8872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008873 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008874 }
8875 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008876 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008877 } else
8878 NEXT;
8879 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008880 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008881 }
8882 }
8883 return(standalone);
8884}
8885
8886/**
8887 * xmlParseXMLDecl:
8888 * @ctxt: an XML parser context
8889 *
8890 * parse an XML declaration header
8891 *
8892 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8893 */
8894
8895void
8896xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8897 xmlChar *version;
8898
8899 /*
8900 * We know that '<?xml' is here.
8901 */
8902 SKIP(5);
8903
William M. Brack76e95df2003-10-18 16:20:14 +00008904 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008905 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8906 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008907 }
8908 SKIP_BLANKS;
8909
8910 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008911 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008912 */
8913 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008914 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008915 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008916 } else {
8917 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8918 /*
8919 * TODO: Blueberry should be detected here
8920 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008921 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8922 "Unsupported version '%s'\n",
8923 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008924 }
8925 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008926 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008927 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008928 }
Owen Taylor3473f882001-02-23 17:55:21 +00008929
8930 /*
8931 * We may have the encoding declaration
8932 */
William M. Brack76e95df2003-10-18 16:20:14 +00008933 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008934 if ((RAW == '?') && (NXT(1) == '>')) {
8935 SKIP(2);
8936 return;
8937 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008939 }
8940 xmlParseEncodingDecl(ctxt);
8941 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8942 /*
8943 * The XML REC instructs us to stop parsing right here
8944 */
8945 return;
8946 }
8947
8948 /*
8949 * We may have the standalone status.
8950 */
William M. Brack76e95df2003-10-18 16:20:14 +00008951 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008952 if ((RAW == '?') && (NXT(1) == '>')) {
8953 SKIP(2);
8954 return;
8955 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008956 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008957 }
8958 SKIP_BLANKS;
8959 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8960
8961 SKIP_BLANKS;
8962 if ((RAW == '?') && (NXT(1) == '>')) {
8963 SKIP(2);
8964 } else if (RAW == '>') {
8965 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008966 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008967 NEXT;
8968 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008969 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008970 MOVETO_ENDTAG(CUR_PTR);
8971 NEXT;
8972 }
8973}
8974
8975/**
8976 * xmlParseMisc:
8977 * @ctxt: an XML parser context
8978 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008979 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008980 *
8981 * [27] Misc ::= Comment | PI | S
8982 */
8983
8984void
8985xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008986 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008987 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008988 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008989 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008990 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008991 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008992 NEXT;
8993 } else
8994 xmlParseComment(ctxt);
8995 }
8996}
8997
8998/**
8999 * xmlParseDocument:
9000 * @ctxt: an XML parser context
9001 *
9002 * parse an XML document (and build a tree if using the standard SAX
9003 * interface).
9004 *
9005 * [1] document ::= prolog element Misc*
9006 *
9007 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9008 *
9009 * Returns 0, -1 in case of error. the parser context is augmented
9010 * as a result of the parsing.
9011 */
9012
9013int
9014xmlParseDocument(xmlParserCtxtPtr ctxt) {
9015 xmlChar start[4];
9016 xmlCharEncoding enc;
9017
9018 xmlInitParser();
9019
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009020 if ((ctxt == NULL) || (ctxt->input == NULL))
9021 return(-1);
9022
Owen Taylor3473f882001-02-23 17:55:21 +00009023 GROW;
9024
9025 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026 * SAX: detecting the level.
9027 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009028 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009029
9030 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009031 * SAX: beginning of the document processing.
9032 */
9033 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9034 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9035
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009036 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9037 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009038 /*
9039 * Get the 4 first bytes and decode the charset
9040 * if enc != XML_CHAR_ENCODING_NONE
9041 * plug some encoding conversion routines.
9042 */
9043 start[0] = RAW;
9044 start[1] = NXT(1);
9045 start[2] = NXT(2);
9046 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009047 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009048 if (enc != XML_CHAR_ENCODING_NONE) {
9049 xmlSwitchEncoding(ctxt, enc);
9050 }
Owen Taylor3473f882001-02-23 17:55:21 +00009051 }
9052
9053
9054 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009055 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009056 }
9057
9058 /*
9059 * Check for the XMLDecl in the Prolog.
9060 */
9061 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009062 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009063
9064 /*
9065 * Note that we will switch encoding on the fly.
9066 */
9067 xmlParseXMLDecl(ctxt);
9068 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9069 /*
9070 * The XML REC instructs us to stop parsing right here
9071 */
9072 return(-1);
9073 }
9074 ctxt->standalone = ctxt->input->standalone;
9075 SKIP_BLANKS;
9076 } else {
9077 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9078 }
9079 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9080 ctxt->sax->startDocument(ctxt->userData);
9081
9082 /*
9083 * The Misc part of the Prolog
9084 */
9085 GROW;
9086 xmlParseMisc(ctxt);
9087
9088 /*
9089 * Then possibly doc type declaration(s) and more Misc
9090 * (doctypedecl Misc*)?
9091 */
9092 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009093 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009094
9095 ctxt->inSubset = 1;
9096 xmlParseDocTypeDecl(ctxt);
9097 if (RAW == '[') {
9098 ctxt->instate = XML_PARSER_DTD;
9099 xmlParseInternalSubset(ctxt);
9100 }
9101
9102 /*
9103 * Create and update the external subset.
9104 */
9105 ctxt->inSubset = 2;
9106 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9107 (!ctxt->disableSAX))
9108 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9109 ctxt->extSubSystem, ctxt->extSubURI);
9110 ctxt->inSubset = 0;
9111
9112
9113 ctxt->instate = XML_PARSER_PROLOG;
9114 xmlParseMisc(ctxt);
9115 }
9116
9117 /*
9118 * Time to start parsing the tree itself
9119 */
9120 GROW;
9121 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009122 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9123 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009124 } else {
9125 ctxt->instate = XML_PARSER_CONTENT;
9126 xmlParseElement(ctxt);
9127 ctxt->instate = XML_PARSER_EPILOG;
9128
9129
9130 /*
9131 * The Misc part at the end
9132 */
9133 xmlParseMisc(ctxt);
9134
Daniel Veillard561b7f82002-03-20 21:55:57 +00009135 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009136 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009137 }
9138 ctxt->instate = XML_PARSER_EOF;
9139 }
9140
9141 /*
9142 * SAX: end of the document processing.
9143 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009144 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009145 ctxt->sax->endDocument(ctxt->userData);
9146
Daniel Veillard5997aca2002-03-18 18:36:20 +00009147 /*
9148 * Remove locally kept entity definitions if the tree was not built
9149 */
9150 if ((ctxt->myDoc != NULL) &&
9151 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9152 xmlFreeDoc(ctxt->myDoc);
9153 ctxt->myDoc = NULL;
9154 }
9155
Daniel Veillardc7612992002-02-17 22:47:37 +00009156 if (! ctxt->wellFormed) {
9157 ctxt->valid = 0;
9158 return(-1);
9159 }
Owen Taylor3473f882001-02-23 17:55:21 +00009160 return(0);
9161}
9162
9163/**
9164 * xmlParseExtParsedEnt:
9165 * @ctxt: an XML parser context
9166 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009167 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009168 * An external general parsed entity is well-formed if it matches the
9169 * production labeled extParsedEnt.
9170 *
9171 * [78] extParsedEnt ::= TextDecl? content
9172 *
9173 * Returns 0, -1 in case of error. the parser context is augmented
9174 * as a result of the parsing.
9175 */
9176
9177int
9178xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9179 xmlChar start[4];
9180 xmlCharEncoding enc;
9181
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009182 if ((ctxt == NULL) || (ctxt->input == NULL))
9183 return(-1);
9184
Owen Taylor3473f882001-02-23 17:55:21 +00009185 xmlDefaultSAXHandlerInit();
9186
Daniel Veillard309f81d2003-09-23 09:02:53 +00009187 xmlDetectSAX2(ctxt);
9188
Owen Taylor3473f882001-02-23 17:55:21 +00009189 GROW;
9190
9191 /*
9192 * SAX: beginning of the document processing.
9193 */
9194 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9195 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9196
9197 /*
9198 * Get the 4 first bytes and decode the charset
9199 * if enc != XML_CHAR_ENCODING_NONE
9200 * plug some encoding conversion routines.
9201 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009202 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9203 start[0] = RAW;
9204 start[1] = NXT(1);
9205 start[2] = NXT(2);
9206 start[3] = NXT(3);
9207 enc = xmlDetectCharEncoding(start, 4);
9208 if (enc != XML_CHAR_ENCODING_NONE) {
9209 xmlSwitchEncoding(ctxt, enc);
9210 }
Owen Taylor3473f882001-02-23 17:55:21 +00009211 }
9212
9213
9214 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009215 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009216 }
9217
9218 /*
9219 * Check for the XMLDecl in the Prolog.
9220 */
9221 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009222 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009223
9224 /*
9225 * Note that we will switch encoding on the fly.
9226 */
9227 xmlParseXMLDecl(ctxt);
9228 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9229 /*
9230 * The XML REC instructs us to stop parsing right here
9231 */
9232 return(-1);
9233 }
9234 SKIP_BLANKS;
9235 } else {
9236 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9237 }
9238 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9239 ctxt->sax->startDocument(ctxt->userData);
9240
9241 /*
9242 * Doing validity checking on chunk doesn't make sense
9243 */
9244 ctxt->instate = XML_PARSER_CONTENT;
9245 ctxt->validate = 0;
9246 ctxt->loadsubset = 0;
9247 ctxt->depth = 0;
9248
9249 xmlParseContent(ctxt);
9250
9251 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009252 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009253 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009254 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009255 }
9256
9257 /*
9258 * SAX: end of the document processing.
9259 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009260 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009261 ctxt->sax->endDocument(ctxt->userData);
9262
9263 if (! ctxt->wellFormed) return(-1);
9264 return(0);
9265}
9266
Daniel Veillard73b013f2003-09-30 12:36:01 +00009267#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009268/************************************************************************
9269 * *
9270 * Progressive parsing interfaces *
9271 * *
9272 ************************************************************************/
9273
9274/**
9275 * xmlParseLookupSequence:
9276 * @ctxt: an XML parser context
9277 * @first: the first char to lookup
9278 * @next: the next char to lookup or zero
9279 * @third: the next char to lookup or zero
9280 *
9281 * Try to find if a sequence (first, next, third) or just (first next) or
9282 * (first) is available in the input stream.
9283 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9284 * to avoid rescanning sequences of bytes, it DOES change the state of the
9285 * parser, do not use liberally.
9286 *
9287 * Returns the index to the current parsing point if the full sequence
9288 * is available, -1 otherwise.
9289 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009290static int
Owen Taylor3473f882001-02-23 17:55:21 +00009291xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9292 xmlChar next, xmlChar third) {
9293 int base, len;
9294 xmlParserInputPtr in;
9295 const xmlChar *buf;
9296
9297 in = ctxt->input;
9298 if (in == NULL) return(-1);
9299 base = in->cur - in->base;
9300 if (base < 0) return(-1);
9301 if (ctxt->checkIndex > base)
9302 base = ctxt->checkIndex;
9303 if (in->buf == NULL) {
9304 buf = in->base;
9305 len = in->length;
9306 } else {
9307 buf = in->buf->buffer->content;
9308 len = in->buf->buffer->use;
9309 }
9310 /* take into account the sequence length */
9311 if (third) len -= 2;
9312 else if (next) len --;
9313 for (;base < len;base++) {
9314 if (buf[base] == first) {
9315 if (third != 0) {
9316 if ((buf[base + 1] != next) ||
9317 (buf[base + 2] != third)) continue;
9318 } else if (next != 0) {
9319 if (buf[base + 1] != next) continue;
9320 }
9321 ctxt->checkIndex = 0;
9322#ifdef DEBUG_PUSH
9323 if (next == 0)
9324 xmlGenericError(xmlGenericErrorContext,
9325 "PP: lookup '%c' found at %d\n",
9326 first, base);
9327 else if (third == 0)
9328 xmlGenericError(xmlGenericErrorContext,
9329 "PP: lookup '%c%c' found at %d\n",
9330 first, next, base);
9331 else
9332 xmlGenericError(xmlGenericErrorContext,
9333 "PP: lookup '%c%c%c' found at %d\n",
9334 first, next, third, base);
9335#endif
9336 return(base - (in->cur - in->base));
9337 }
9338 }
9339 ctxt->checkIndex = base;
9340#ifdef DEBUG_PUSH
9341 if (next == 0)
9342 xmlGenericError(xmlGenericErrorContext,
9343 "PP: lookup '%c' failed\n", first);
9344 else if (third == 0)
9345 xmlGenericError(xmlGenericErrorContext,
9346 "PP: lookup '%c%c' failed\n", first, next);
9347 else
9348 xmlGenericError(xmlGenericErrorContext,
9349 "PP: lookup '%c%c%c' failed\n", first, next, third);
9350#endif
9351 return(-1);
9352}
9353
9354/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009355 * xmlParseGetLasts:
9356 * @ctxt: an XML parser context
9357 * @lastlt: pointer to store the last '<' from the input
9358 * @lastgt: pointer to store the last '>' from the input
9359 *
9360 * Lookup the last < and > in the current chunk
9361 */
9362static void
9363xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9364 const xmlChar **lastgt) {
9365 const xmlChar *tmp;
9366
9367 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9368 xmlGenericError(xmlGenericErrorContext,
9369 "Internal error: xmlParseGetLasts\n");
9370 return;
9371 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009372 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009373 tmp = ctxt->input->end;
9374 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009375 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 if (tmp < ctxt->input->base) {
9377 *lastlt = NULL;
9378 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009379 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009380 *lastlt = tmp;
9381 tmp++;
9382 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9383 if (*tmp == '\'') {
9384 tmp++;
9385 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9386 if (tmp < ctxt->input->end) tmp++;
9387 } else if (*tmp == '"') {
9388 tmp++;
9389 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9390 if (tmp < ctxt->input->end) tmp++;
9391 } else
9392 tmp++;
9393 }
9394 if (tmp < ctxt->input->end)
9395 *lastgt = tmp;
9396 else {
9397 tmp = *lastlt;
9398 tmp--;
9399 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9400 if (tmp >= ctxt->input->base)
9401 *lastgt = tmp;
9402 else
9403 *lastgt = NULL;
9404 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009405 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 } else {
9407 *lastlt = NULL;
9408 *lastgt = NULL;
9409 }
9410}
9411/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009412 * xmlCheckCdataPush:
9413 * @cur: pointer to the bock of characters
9414 * @len: length of the block in bytes
9415 *
9416 * Check that the block of characters is okay as SCdata content [20]
9417 *
9418 * Returns the number of bytes to pass if okay, a negative index where an
9419 * UTF-8 error occured otherwise
9420 */
9421static int
9422xmlCheckCdataPush(const xmlChar *utf, int len) {
9423 int ix;
9424 unsigned char c;
9425 int codepoint;
9426
9427 if ((utf == NULL) || (len <= 0))
9428 return(0);
9429
9430 for (ix = 0; ix < len;) { /* string is 0-terminated */
9431 c = utf[ix];
9432 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9433 if (c >= 0x20)
9434 ix++;
9435 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9436 ix++;
9437 else
9438 return(-ix);
9439 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9440 if (ix + 2 > len) return(ix);
9441 if ((utf[ix+1] & 0xc0 ) != 0x80)
9442 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009443 codepoint = (utf[ix] & 0x1f) << 6;
9444 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009445 if (!xmlIsCharQ(codepoint))
9446 return(-ix);
9447 ix += 2;
9448 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9449 if (ix + 3 > len) return(ix);
9450 if (((utf[ix+1] & 0xc0) != 0x80) ||
9451 ((utf[ix+2] & 0xc0) != 0x80))
9452 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009453 codepoint = (utf[ix] & 0xf) << 12;
9454 codepoint |= (utf[ix+1] & 0x3f) << 6;
9455 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009456 if (!xmlIsCharQ(codepoint))
9457 return(-ix);
9458 ix += 3;
9459 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9460 if (ix + 4 > len) return(ix);
9461 if (((utf[ix+1] & 0xc0) != 0x80) ||
9462 ((utf[ix+2] & 0xc0) != 0x80) ||
9463 ((utf[ix+3] & 0xc0) != 0x80))
9464 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009465 codepoint = (utf[ix] & 0x7) << 18;
9466 codepoint |= (utf[ix+1] & 0x3f) << 12;
9467 codepoint |= (utf[ix+2] & 0x3f) << 6;
9468 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009469 if (!xmlIsCharQ(codepoint))
9470 return(-ix);
9471 ix += 4;
9472 } else /* unknown encoding */
9473 return(-ix);
9474 }
9475 return(ix);
9476}
9477
9478/**
Owen Taylor3473f882001-02-23 17:55:21 +00009479 * xmlParseTryOrFinish:
9480 * @ctxt: an XML parser context
9481 * @terminate: last chunk indicator
9482 *
9483 * Try to progress on parsing
9484 *
9485 * Returns zero if no parsing was possible
9486 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009487static int
Owen Taylor3473f882001-02-23 17:55:21 +00009488xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9489 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009490 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009491 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009492 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009493
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009494 if (ctxt->input == NULL)
9495 return(0);
9496
Owen Taylor3473f882001-02-23 17:55:21 +00009497#ifdef DEBUG_PUSH
9498 switch (ctxt->instate) {
9499 case XML_PARSER_EOF:
9500 xmlGenericError(xmlGenericErrorContext,
9501 "PP: try EOF\n"); break;
9502 case XML_PARSER_START:
9503 xmlGenericError(xmlGenericErrorContext,
9504 "PP: try START\n"); break;
9505 case XML_PARSER_MISC:
9506 xmlGenericError(xmlGenericErrorContext,
9507 "PP: try MISC\n");break;
9508 case XML_PARSER_COMMENT:
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: try COMMENT\n");break;
9511 case XML_PARSER_PROLOG:
9512 xmlGenericError(xmlGenericErrorContext,
9513 "PP: try PROLOG\n");break;
9514 case XML_PARSER_START_TAG:
9515 xmlGenericError(xmlGenericErrorContext,
9516 "PP: try START_TAG\n");break;
9517 case XML_PARSER_CONTENT:
9518 xmlGenericError(xmlGenericErrorContext,
9519 "PP: try CONTENT\n");break;
9520 case XML_PARSER_CDATA_SECTION:
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: try CDATA_SECTION\n");break;
9523 case XML_PARSER_END_TAG:
9524 xmlGenericError(xmlGenericErrorContext,
9525 "PP: try END_TAG\n");break;
9526 case XML_PARSER_ENTITY_DECL:
9527 xmlGenericError(xmlGenericErrorContext,
9528 "PP: try ENTITY_DECL\n");break;
9529 case XML_PARSER_ENTITY_VALUE:
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: try ENTITY_VALUE\n");break;
9532 case XML_PARSER_ATTRIBUTE_VALUE:
9533 xmlGenericError(xmlGenericErrorContext,
9534 "PP: try ATTRIBUTE_VALUE\n");break;
9535 case XML_PARSER_DTD:
9536 xmlGenericError(xmlGenericErrorContext,
9537 "PP: try DTD\n");break;
9538 case XML_PARSER_EPILOG:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: try EPILOG\n");break;
9541 case XML_PARSER_PI:
9542 xmlGenericError(xmlGenericErrorContext,
9543 "PP: try PI\n");break;
9544 case XML_PARSER_IGNORE:
9545 xmlGenericError(xmlGenericErrorContext,
9546 "PP: try IGNORE\n");break;
9547 }
9548#endif
9549
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009550 if ((ctxt->input != NULL) &&
9551 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009552 xmlSHRINK(ctxt);
9553 ctxt->checkIndex = 0;
9554 }
9555 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009556
Daniel Veillarda880b122003-04-21 21:36:41 +00009557 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009558 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009559 return(0);
9560
9561
Owen Taylor3473f882001-02-23 17:55:21 +00009562 /*
9563 * Pop-up of finished entities.
9564 */
9565 while ((RAW == 0) && (ctxt->inputNr > 1))
9566 xmlPopInput(ctxt);
9567
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009568 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009569 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009570 avail = ctxt->input->length -
9571 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009572 else {
9573 /*
9574 * If we are operating on converted input, try to flush
9575 * remainng chars to avoid them stalling in the non-converted
9576 * buffer.
9577 */
9578 if ((ctxt->input->buf->raw != NULL) &&
9579 (ctxt->input->buf->raw->use > 0)) {
9580 int base = ctxt->input->base -
9581 ctxt->input->buf->buffer->content;
9582 int current = ctxt->input->cur - ctxt->input->base;
9583
9584 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9585 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9586 ctxt->input->cur = ctxt->input->base + current;
9587 ctxt->input->end =
9588 &ctxt->input->buf->buffer->content[
9589 ctxt->input->buf->buffer->use];
9590 }
9591 avail = ctxt->input->buf->buffer->use -
9592 (ctxt->input->cur - ctxt->input->base);
9593 }
Owen Taylor3473f882001-02-23 17:55:21 +00009594 if (avail < 1)
9595 goto done;
9596 switch (ctxt->instate) {
9597 case XML_PARSER_EOF:
9598 /*
9599 * Document parsing is done !
9600 */
9601 goto done;
9602 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009603 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9604 xmlChar start[4];
9605 xmlCharEncoding enc;
9606
9607 /*
9608 * Very first chars read from the document flow.
9609 */
9610 if (avail < 4)
9611 goto done;
9612
9613 /*
9614 * Get the 4 first bytes and decode the charset
9615 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009616 * plug some encoding conversion routines,
9617 * else xmlSwitchEncoding will set to (default)
9618 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009619 */
9620 start[0] = RAW;
9621 start[1] = NXT(1);
9622 start[2] = NXT(2);
9623 start[3] = NXT(3);
9624 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009625 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009626 break;
9627 }
Owen Taylor3473f882001-02-23 17:55:21 +00009628
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009629 if (avail < 2)
9630 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009631 cur = ctxt->input->cur[0];
9632 next = ctxt->input->cur[1];
9633 if (cur == 0) {
9634 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9635 ctxt->sax->setDocumentLocator(ctxt->userData,
9636 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009637 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009638 ctxt->instate = XML_PARSER_EOF;
9639#ifdef DEBUG_PUSH
9640 xmlGenericError(xmlGenericErrorContext,
9641 "PP: entering EOF\n");
9642#endif
9643 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9644 ctxt->sax->endDocument(ctxt->userData);
9645 goto done;
9646 }
9647 if ((cur == '<') && (next == '?')) {
9648 /* PI or XML decl */
9649 if (avail < 5) return(ret);
9650 if ((!terminate) &&
9651 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9652 return(ret);
9653 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9654 ctxt->sax->setDocumentLocator(ctxt->userData,
9655 &xmlDefaultSAXLocator);
9656 if ((ctxt->input->cur[2] == 'x') &&
9657 (ctxt->input->cur[3] == 'm') &&
9658 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009659 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009660 ret += 5;
9661#ifdef DEBUG_PUSH
9662 xmlGenericError(xmlGenericErrorContext,
9663 "PP: Parsing XML Decl\n");
9664#endif
9665 xmlParseXMLDecl(ctxt);
9666 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9667 /*
9668 * The XML REC instructs us to stop parsing right
9669 * here
9670 */
9671 ctxt->instate = XML_PARSER_EOF;
9672 return(0);
9673 }
9674 ctxt->standalone = ctxt->input->standalone;
9675 if ((ctxt->encoding == NULL) &&
9676 (ctxt->input->encoding != NULL))
9677 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9678 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9679 (!ctxt->disableSAX))
9680 ctxt->sax->startDocument(ctxt->userData);
9681 ctxt->instate = XML_PARSER_MISC;
9682#ifdef DEBUG_PUSH
9683 xmlGenericError(xmlGenericErrorContext,
9684 "PP: entering MISC\n");
9685#endif
9686 } else {
9687 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9688 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9689 (!ctxt->disableSAX))
9690 ctxt->sax->startDocument(ctxt->userData);
9691 ctxt->instate = XML_PARSER_MISC;
9692#ifdef DEBUG_PUSH
9693 xmlGenericError(xmlGenericErrorContext,
9694 "PP: entering MISC\n");
9695#endif
9696 }
9697 } else {
9698 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9699 ctxt->sax->setDocumentLocator(ctxt->userData,
9700 &xmlDefaultSAXLocator);
9701 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009702 if (ctxt->version == NULL) {
9703 xmlErrMemory(ctxt, NULL);
9704 break;
9705 }
Owen Taylor3473f882001-02-23 17:55:21 +00009706 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9707 (!ctxt->disableSAX))
9708 ctxt->sax->startDocument(ctxt->userData);
9709 ctxt->instate = XML_PARSER_MISC;
9710#ifdef DEBUG_PUSH
9711 xmlGenericError(xmlGenericErrorContext,
9712 "PP: entering MISC\n");
9713#endif
9714 }
9715 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009716 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009717 const xmlChar *name;
9718 const xmlChar *prefix;
9719 const xmlChar *URI;
9720 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009721
9722 if ((avail < 2) && (ctxt->inputNr == 1))
9723 goto done;
9724 cur = ctxt->input->cur[0];
9725 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009726 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009727 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009728 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9729 ctxt->sax->endDocument(ctxt->userData);
9730 goto done;
9731 }
9732 if (!terminate) {
9733 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009734 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009735 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009736 goto done;
9737 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9738 goto done;
9739 }
9740 }
9741 if (ctxt->spaceNr == 0)
9742 spacePush(ctxt, -1);
9743 else
9744 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009745#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009746 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009747#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009748 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009749#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009750 else
9751 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009752#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009753 if (name == NULL) {
9754 spacePop(ctxt);
9755 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009756 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9757 ctxt->sax->endDocument(ctxt->userData);
9758 goto done;
9759 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009760#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009761 /*
9762 * [ VC: Root Element Type ]
9763 * The Name in the document type declaration must match
9764 * the element type of the root element.
9765 */
9766 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9767 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9768 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009769#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009770
9771 /*
9772 * Check for an Empty Element.
9773 */
9774 if ((RAW == '/') && (NXT(1) == '>')) {
9775 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009776
9777 if (ctxt->sax2) {
9778 if ((ctxt->sax != NULL) &&
9779 (ctxt->sax->endElementNs != NULL) &&
9780 (!ctxt->disableSAX))
9781 ctxt->sax->endElementNs(ctxt->userData, name,
9782 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009783 if (ctxt->nsNr - nsNr > 0)
9784 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009785#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009786 } else {
9787 if ((ctxt->sax != NULL) &&
9788 (ctxt->sax->endElement != NULL) &&
9789 (!ctxt->disableSAX))
9790 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009791#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009792 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009793 spacePop(ctxt);
9794 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009795 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009796 } else {
9797 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009798 }
9799 break;
9800 }
9801 if (RAW == '>') {
9802 NEXT;
9803 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009804 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009805 "Couldn't find end of Start Tag %s\n",
9806 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009807 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009808 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009810 if (ctxt->sax2)
9811 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009812#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009813 else
9814 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009815#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009816
Daniel Veillarda880b122003-04-21 21:36:41 +00009817 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009818 break;
9819 }
9820 case XML_PARSER_CONTENT: {
9821 const xmlChar *test;
9822 unsigned int cons;
9823 if ((avail < 2) && (ctxt->inputNr == 1))
9824 goto done;
9825 cur = ctxt->input->cur[0];
9826 next = ctxt->input->cur[1];
9827
9828 test = CUR_PTR;
9829 cons = ctxt->input->consumed;
9830 if ((cur == '<') && (next == '/')) {
9831 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009832 break;
9833 } else if ((cur == '<') && (next == '?')) {
9834 if ((!terminate) &&
9835 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9836 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009837 xmlParsePI(ctxt);
9838 } else if ((cur == '<') && (next != '!')) {
9839 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009840 break;
9841 } else if ((cur == '<') && (next == '!') &&
9842 (ctxt->input->cur[2] == '-') &&
9843 (ctxt->input->cur[3] == '-')) {
9844 if ((!terminate) &&
9845 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9846 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009847 xmlParseComment(ctxt);
9848 ctxt->instate = XML_PARSER_CONTENT;
9849 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9850 (ctxt->input->cur[2] == '[') &&
9851 (ctxt->input->cur[3] == 'C') &&
9852 (ctxt->input->cur[4] == 'D') &&
9853 (ctxt->input->cur[5] == 'A') &&
9854 (ctxt->input->cur[6] == 'T') &&
9855 (ctxt->input->cur[7] == 'A') &&
9856 (ctxt->input->cur[8] == '[')) {
9857 SKIP(9);
9858 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009859 break;
9860 } else if ((cur == '<') && (next == '!') &&
9861 (avail < 9)) {
9862 goto done;
9863 } else if (cur == '&') {
9864 if ((!terminate) &&
9865 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9866 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009867 xmlParseReference(ctxt);
9868 } else {
9869 /* TODO Avoid the extra copy, handle directly !!! */
9870 /*
9871 * Goal of the following test is:
9872 * - minimize calls to the SAX 'character' callback
9873 * when they are mergeable
9874 * - handle an problem for isBlank when we only parse
9875 * a sequence of blank chars and the next one is
9876 * not available to check against '<' presence.
9877 * - tries to homogenize the differences in SAX
9878 * callbacks between the push and pull versions
9879 * of the parser.
9880 */
9881 if ((ctxt->inputNr == 1) &&
9882 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9883 if (!terminate) {
9884 if (ctxt->progressive) {
9885 if ((lastlt == NULL) ||
9886 (ctxt->input->cur > lastlt))
9887 goto done;
9888 } else if (xmlParseLookupSequence(ctxt,
9889 '<', 0, 0) < 0) {
9890 goto done;
9891 }
9892 }
9893 }
9894 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009895 xmlParseCharData(ctxt, 0);
9896 }
9897 /*
9898 * Pop-up of finished entities.
9899 */
9900 while ((RAW == 0) && (ctxt->inputNr > 1))
9901 xmlPopInput(ctxt);
9902 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009903 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9904 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009905 ctxt->instate = XML_PARSER_EOF;
9906 break;
9907 }
9908 break;
9909 }
9910 case XML_PARSER_END_TAG:
9911 if (avail < 2)
9912 goto done;
9913 if (!terminate) {
9914 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009915 /* > can be found unescaped in attribute values */
9916 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009917 goto done;
9918 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9919 goto done;
9920 }
9921 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009922 if (ctxt->sax2) {
9923 xmlParseEndTag2(ctxt,
9924 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9925 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009926 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009927 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009928 }
9929#ifdef LIBXML_SAX1_ENABLED
9930 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009931 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009933 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009934 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009935 } else {
9936 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009937 }
9938 break;
9939 case XML_PARSER_CDATA_SECTION: {
9940 /*
9941 * The Push mode need to have the SAX callback for
9942 * cdataBlock merge back contiguous callbacks.
9943 */
9944 int base;
9945
9946 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9947 if (base < 0) {
9948 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009949 int tmp;
9950
9951 tmp = xmlCheckCdataPush(ctxt->input->cur,
9952 XML_PARSER_BIG_BUFFER_SIZE);
9953 if (tmp < 0) {
9954 tmp = -tmp;
9955 ctxt->input->cur += tmp;
9956 goto encoding_error;
9957 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009958 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9959 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009960 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009961 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009962 else if (ctxt->sax->characters != NULL)
9963 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009964 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009965 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009966 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009967 ctxt->checkIndex = 0;
9968 }
9969 goto done;
9970 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009971 int tmp;
9972
9973 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9974 if ((tmp < 0) || (tmp != base)) {
9975 tmp = -tmp;
9976 ctxt->input->cur += tmp;
9977 goto encoding_error;
9978 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009979 if ((ctxt->sax != NULL) && (base > 0) &&
9980 (!ctxt->disableSAX)) {
9981 if (ctxt->sax->cdataBlock != NULL)
9982 ctxt->sax->cdataBlock(ctxt->userData,
9983 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009984 else if (ctxt->sax->characters != NULL)
9985 ctxt->sax->characters(ctxt->userData,
9986 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009987 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009988 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009989 ctxt->checkIndex = 0;
9990 ctxt->instate = XML_PARSER_CONTENT;
9991#ifdef DEBUG_PUSH
9992 xmlGenericError(xmlGenericErrorContext,
9993 "PP: entering CONTENT\n");
9994#endif
9995 }
9996 break;
9997 }
Owen Taylor3473f882001-02-23 17:55:21 +00009998 case XML_PARSER_MISC:
9999 SKIP_BLANKS;
10000 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010001 avail = ctxt->input->length -
10002 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010003 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010004 avail = ctxt->input->buf->buffer->use -
10005 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010006 if (avail < 2)
10007 goto done;
10008 cur = ctxt->input->cur[0];
10009 next = ctxt->input->cur[1];
10010 if ((cur == '<') && (next == '?')) {
10011 if ((!terminate) &&
10012 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10013 goto done;
10014#ifdef DEBUG_PUSH
10015 xmlGenericError(xmlGenericErrorContext,
10016 "PP: Parsing PI\n");
10017#endif
10018 xmlParsePI(ctxt);
10019 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 (ctxt->input->cur[2] == '-') &&
10021 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010022 if ((!terminate) &&
10023 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10024 goto done;
10025#ifdef DEBUG_PUSH
10026 xmlGenericError(xmlGenericErrorContext,
10027 "PP: Parsing Comment\n");
10028#endif
10029 xmlParseComment(ctxt);
10030 ctxt->instate = XML_PARSER_MISC;
10031 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010032 (ctxt->input->cur[2] == 'D') &&
10033 (ctxt->input->cur[3] == 'O') &&
10034 (ctxt->input->cur[4] == 'C') &&
10035 (ctxt->input->cur[5] == 'T') &&
10036 (ctxt->input->cur[6] == 'Y') &&
10037 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010038 (ctxt->input->cur[8] == 'E')) {
10039 if ((!terminate) &&
10040 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10041 goto done;
10042#ifdef DEBUG_PUSH
10043 xmlGenericError(xmlGenericErrorContext,
10044 "PP: Parsing internal subset\n");
10045#endif
10046 ctxt->inSubset = 1;
10047 xmlParseDocTypeDecl(ctxt);
10048 if (RAW == '[') {
10049 ctxt->instate = XML_PARSER_DTD;
10050#ifdef DEBUG_PUSH
10051 xmlGenericError(xmlGenericErrorContext,
10052 "PP: entering DTD\n");
10053#endif
10054 } else {
10055 /*
10056 * Create and update the external subset.
10057 */
10058 ctxt->inSubset = 2;
10059 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10060 (ctxt->sax->externalSubset != NULL))
10061 ctxt->sax->externalSubset(ctxt->userData,
10062 ctxt->intSubName, ctxt->extSubSystem,
10063 ctxt->extSubURI);
10064 ctxt->inSubset = 0;
10065 ctxt->instate = XML_PARSER_PROLOG;
10066#ifdef DEBUG_PUSH
10067 xmlGenericError(xmlGenericErrorContext,
10068 "PP: entering PROLOG\n");
10069#endif
10070 }
10071 } else if ((cur == '<') && (next == '!') &&
10072 (avail < 9)) {
10073 goto done;
10074 } else {
10075 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010076 ctxt->progressive = 1;
10077 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010078#ifdef DEBUG_PUSH
10079 xmlGenericError(xmlGenericErrorContext,
10080 "PP: entering START_TAG\n");
10081#endif
10082 }
10083 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010084 case XML_PARSER_PROLOG:
10085 SKIP_BLANKS;
10086 if (ctxt->input->buf == NULL)
10087 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10088 else
10089 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10090 if (avail < 2)
10091 goto done;
10092 cur = ctxt->input->cur[0];
10093 next = ctxt->input->cur[1];
10094 if ((cur == '<') && (next == '?')) {
10095 if ((!terminate) &&
10096 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10097 goto done;
10098#ifdef DEBUG_PUSH
10099 xmlGenericError(xmlGenericErrorContext,
10100 "PP: Parsing PI\n");
10101#endif
10102 xmlParsePI(ctxt);
10103 } else if ((cur == '<') && (next == '!') &&
10104 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10105 if ((!terminate) &&
10106 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10107 goto done;
10108#ifdef DEBUG_PUSH
10109 xmlGenericError(xmlGenericErrorContext,
10110 "PP: Parsing Comment\n");
10111#endif
10112 xmlParseComment(ctxt);
10113 ctxt->instate = XML_PARSER_PROLOG;
10114 } else if ((cur == '<') && (next == '!') &&
10115 (avail < 4)) {
10116 goto done;
10117 } else {
10118 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010119 if (ctxt->progressive == 0)
10120 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010121 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010122#ifdef DEBUG_PUSH
10123 xmlGenericError(xmlGenericErrorContext,
10124 "PP: entering START_TAG\n");
10125#endif
10126 }
10127 break;
10128 case XML_PARSER_EPILOG:
10129 SKIP_BLANKS;
10130 if (ctxt->input->buf == NULL)
10131 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10132 else
10133 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10134 if (avail < 2)
10135 goto done;
10136 cur = ctxt->input->cur[0];
10137 next = ctxt->input->cur[1];
10138 if ((cur == '<') && (next == '?')) {
10139 if ((!terminate) &&
10140 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10141 goto done;
10142#ifdef DEBUG_PUSH
10143 xmlGenericError(xmlGenericErrorContext,
10144 "PP: Parsing PI\n");
10145#endif
10146 xmlParsePI(ctxt);
10147 ctxt->instate = XML_PARSER_EPILOG;
10148 } else if ((cur == '<') && (next == '!') &&
10149 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10150 if ((!terminate) &&
10151 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10152 goto done;
10153#ifdef DEBUG_PUSH
10154 xmlGenericError(xmlGenericErrorContext,
10155 "PP: Parsing Comment\n");
10156#endif
10157 xmlParseComment(ctxt);
10158 ctxt->instate = XML_PARSER_EPILOG;
10159 } else if ((cur == '<') && (next == '!') &&
10160 (avail < 4)) {
10161 goto done;
10162 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010163 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010164 ctxt->instate = XML_PARSER_EOF;
10165#ifdef DEBUG_PUSH
10166 xmlGenericError(xmlGenericErrorContext,
10167 "PP: entering EOF\n");
10168#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010169 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010170 ctxt->sax->endDocument(ctxt->userData);
10171 goto done;
10172 }
10173 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010174 case XML_PARSER_DTD: {
10175 /*
10176 * Sorry but progressive parsing of the internal subset
10177 * is not expected to be supported. We first check that
10178 * the full content of the internal subset is available and
10179 * the parsing is launched only at that point.
10180 * Internal subset ends up with "']' S? '>'" in an unescaped
10181 * section and not in a ']]>' sequence which are conditional
10182 * sections (whoever argued to keep that crap in XML deserve
10183 * a place in hell !).
10184 */
10185 int base, i;
10186 xmlChar *buf;
10187 xmlChar quote = 0;
10188
10189 base = ctxt->input->cur - ctxt->input->base;
10190 if (base < 0) return(0);
10191 if (ctxt->checkIndex > base)
10192 base = ctxt->checkIndex;
10193 buf = ctxt->input->buf->buffer->content;
10194 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10195 base++) {
10196 if (quote != 0) {
10197 if (buf[base] == quote)
10198 quote = 0;
10199 continue;
10200 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010201 if ((quote == 0) && (buf[base] == '<')) {
10202 int found = 0;
10203 /* special handling of comments */
10204 if (((unsigned int) base + 4 <
10205 ctxt->input->buf->buffer->use) &&
10206 (buf[base + 1] == '!') &&
10207 (buf[base + 2] == '-') &&
10208 (buf[base + 3] == '-')) {
10209 for (;(unsigned int) base + 3 <
10210 ctxt->input->buf->buffer->use; base++) {
10211 if ((buf[base] == '-') &&
10212 (buf[base + 1] == '-') &&
10213 (buf[base + 2] == '>')) {
10214 found = 1;
10215 base += 2;
10216 break;
10217 }
10218 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010219 if (!found) {
10220#if 0
10221 fprintf(stderr, "unfinished comment\n");
10222#endif
10223 break; /* for */
10224 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010225 continue;
10226 }
10227 }
Owen Taylor3473f882001-02-23 17:55:21 +000010228 if (buf[base] == '"') {
10229 quote = '"';
10230 continue;
10231 }
10232 if (buf[base] == '\'') {
10233 quote = '\'';
10234 continue;
10235 }
10236 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010237#if 0
10238 fprintf(stderr, "%c%c%c%c: ", buf[base],
10239 buf[base + 1], buf[base + 2], buf[base + 3]);
10240#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010241 if ((unsigned int) base +1 >=
10242 ctxt->input->buf->buffer->use)
10243 break;
10244 if (buf[base + 1] == ']') {
10245 /* conditional crap, skip both ']' ! */
10246 base++;
10247 continue;
10248 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010249 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010250 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10251 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010252 if (buf[base + i] == '>') {
10253#if 0
10254 fprintf(stderr, "found\n");
10255#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010256 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010257 }
10258 if (!IS_BLANK_CH(buf[base + i])) {
10259#if 0
10260 fprintf(stderr, "not found\n");
10261#endif
10262 goto not_end_of_int_subset;
10263 }
Owen Taylor3473f882001-02-23 17:55:21 +000010264 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010265#if 0
10266 fprintf(stderr, "end of stream\n");
10267#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010268 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010269
Owen Taylor3473f882001-02-23 17:55:21 +000010270 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010271not_end_of_int_subset:
10272 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
10274 /*
10275 * We didn't found the end of the Internal subset
10276 */
Owen Taylor3473f882001-02-23 17:55:21 +000010277#ifdef DEBUG_PUSH
10278 if (next == 0)
10279 xmlGenericError(xmlGenericErrorContext,
10280 "PP: lookup of int subset end filed\n");
10281#endif
10282 goto done;
10283
10284found_end_int_subset:
10285 xmlParseInternalSubset(ctxt);
10286 ctxt->inSubset = 2;
10287 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10288 (ctxt->sax->externalSubset != NULL))
10289 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10290 ctxt->extSubSystem, ctxt->extSubURI);
10291 ctxt->inSubset = 0;
10292 ctxt->instate = XML_PARSER_PROLOG;
10293 ctxt->checkIndex = 0;
10294#ifdef DEBUG_PUSH
10295 xmlGenericError(xmlGenericErrorContext,
10296 "PP: entering PROLOG\n");
10297#endif
10298 break;
10299 }
10300 case XML_PARSER_COMMENT:
10301 xmlGenericError(xmlGenericErrorContext,
10302 "PP: internal error, state == COMMENT\n");
10303 ctxt->instate = XML_PARSER_CONTENT;
10304#ifdef DEBUG_PUSH
10305 xmlGenericError(xmlGenericErrorContext,
10306 "PP: entering CONTENT\n");
10307#endif
10308 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010309 case XML_PARSER_IGNORE:
10310 xmlGenericError(xmlGenericErrorContext,
10311 "PP: internal error, state == IGNORE");
10312 ctxt->instate = XML_PARSER_DTD;
10313#ifdef DEBUG_PUSH
10314 xmlGenericError(xmlGenericErrorContext,
10315 "PP: entering DTD\n");
10316#endif
10317 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010318 case XML_PARSER_PI:
10319 xmlGenericError(xmlGenericErrorContext,
10320 "PP: internal error, state == PI\n");
10321 ctxt->instate = XML_PARSER_CONTENT;
10322#ifdef DEBUG_PUSH
10323 xmlGenericError(xmlGenericErrorContext,
10324 "PP: entering CONTENT\n");
10325#endif
10326 break;
10327 case XML_PARSER_ENTITY_DECL:
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: internal error, state == ENTITY_DECL\n");
10330 ctxt->instate = XML_PARSER_DTD;
10331#ifdef DEBUG_PUSH
10332 xmlGenericError(xmlGenericErrorContext,
10333 "PP: entering DTD\n");
10334#endif
10335 break;
10336 case XML_PARSER_ENTITY_VALUE:
10337 xmlGenericError(xmlGenericErrorContext,
10338 "PP: internal error, state == ENTITY_VALUE\n");
10339 ctxt->instate = XML_PARSER_CONTENT;
10340#ifdef DEBUG_PUSH
10341 xmlGenericError(xmlGenericErrorContext,
10342 "PP: entering DTD\n");
10343#endif
10344 break;
10345 case XML_PARSER_ATTRIBUTE_VALUE:
10346 xmlGenericError(xmlGenericErrorContext,
10347 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10348 ctxt->instate = XML_PARSER_START_TAG;
10349#ifdef DEBUG_PUSH
10350 xmlGenericError(xmlGenericErrorContext,
10351 "PP: entering START_TAG\n");
10352#endif
10353 break;
10354 case XML_PARSER_SYSTEM_LITERAL:
10355 xmlGenericError(xmlGenericErrorContext,
10356 "PP: internal error, state == SYSTEM_LITERAL\n");
10357 ctxt->instate = XML_PARSER_START_TAG;
10358#ifdef DEBUG_PUSH
10359 xmlGenericError(xmlGenericErrorContext,
10360 "PP: entering START_TAG\n");
10361#endif
10362 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010363 case XML_PARSER_PUBLIC_LITERAL:
10364 xmlGenericError(xmlGenericErrorContext,
10365 "PP: internal error, state == PUBLIC_LITERAL\n");
10366 ctxt->instate = XML_PARSER_START_TAG;
10367#ifdef DEBUG_PUSH
10368 xmlGenericError(xmlGenericErrorContext,
10369 "PP: entering START_TAG\n");
10370#endif
10371 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010372 }
10373 }
10374done:
10375#ifdef DEBUG_PUSH
10376 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10377#endif
10378 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010379encoding_error:
10380 {
10381 char buffer[150];
10382
10383 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10384 ctxt->input->cur[0], ctxt->input->cur[1],
10385 ctxt->input->cur[2], ctxt->input->cur[3]);
10386 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10387 "Input is not proper UTF-8, indicate encoding !\n%s",
10388 BAD_CAST buffer, NULL);
10389 }
10390 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010391}
10392
10393/**
Owen Taylor3473f882001-02-23 17:55:21 +000010394 * xmlParseChunk:
10395 * @ctxt: an XML parser context
10396 * @chunk: an char array
10397 * @size: the size in byte of the chunk
10398 * @terminate: last chunk indicator
10399 *
10400 * Parse a Chunk of memory
10401 *
10402 * Returns zero if no error, the xmlParserErrors otherwise.
10403 */
10404int
10405xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10406 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010407 if (ctxt == NULL)
10408 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010409 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010410 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010411 if (ctxt->instate == XML_PARSER_START)
10412 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010413 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10414 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10415 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10416 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010417 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010418
William M. Bracka3215c72004-07-31 16:24:01 +000010419 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10420 if (res < 0) {
10421 ctxt->errNo = XML_PARSER_EOF;
10422 ctxt->disableSAX = 1;
10423 return (XML_PARSER_EOF);
10424 }
Owen Taylor3473f882001-02-23 17:55:21 +000010425 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10426 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010427 ctxt->input->end =
10428 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010429#ifdef DEBUG_PUSH
10430 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10431#endif
10432
Owen Taylor3473f882001-02-23 17:55:21 +000010433 } else if (ctxt->instate != XML_PARSER_EOF) {
10434 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10435 xmlParserInputBufferPtr in = ctxt->input->buf;
10436 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10437 (in->raw != NULL)) {
10438 int nbchars;
10439
10440 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10441 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010442 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010443 xmlGenericError(xmlGenericErrorContext,
10444 "xmlParseChunk: encoder error\n");
10445 return(XML_ERR_INVALID_ENCODING);
10446 }
10447 }
10448 }
10449 }
10450 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010451 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010452 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010453 if (terminate) {
10454 /*
10455 * Check for termination
10456 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010457 int avail = 0;
10458
10459 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010460 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010461 avail = ctxt->input->length -
10462 (ctxt->input->cur - ctxt->input->base);
10463 else
10464 avail = ctxt->input->buf->buffer->use -
10465 (ctxt->input->cur - ctxt->input->base);
10466 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010467
Owen Taylor3473f882001-02-23 17:55:21 +000010468 if ((ctxt->instate != XML_PARSER_EOF) &&
10469 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010470 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010471 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010472 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010473 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010474 }
Owen Taylor3473f882001-02-23 17:55:21 +000010475 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010476 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010477 ctxt->sax->endDocument(ctxt->userData);
10478 }
10479 ctxt->instate = XML_PARSER_EOF;
10480 }
10481 return((xmlParserErrors) ctxt->errNo);
10482}
10483
10484/************************************************************************
10485 * *
10486 * I/O front end functions to the parser *
10487 * *
10488 ************************************************************************/
10489
10490/**
Owen Taylor3473f882001-02-23 17:55:21 +000010491 * xmlCreatePushParserCtxt:
10492 * @sax: a SAX handler
10493 * @user_data: The user data returned on SAX callbacks
10494 * @chunk: a pointer to an array of chars
10495 * @size: number of chars in the array
10496 * @filename: an optional file name or URI
10497 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010498 * Create a parser context for using the XML parser in push mode.
10499 * If @buffer and @size are non-NULL, the data is used to detect
10500 * the encoding. The remaining characters will be parsed so they
10501 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010502 * To allow content encoding detection, @size should be >= 4
10503 * The value of @filename is used for fetching external entities
10504 * and error/warning reports.
10505 *
10506 * Returns the new parser context or NULL
10507 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010508
Owen Taylor3473f882001-02-23 17:55:21 +000010509xmlParserCtxtPtr
10510xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10511 const char *chunk, int size, const char *filename) {
10512 xmlParserCtxtPtr ctxt;
10513 xmlParserInputPtr inputStream;
10514 xmlParserInputBufferPtr buf;
10515 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10516
10517 /*
10518 * plug some encoding conversion routines
10519 */
10520 if ((chunk != NULL) && (size >= 4))
10521 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10522
10523 buf = xmlAllocParserInputBuffer(enc);
10524 if (buf == NULL) return(NULL);
10525
10526 ctxt = xmlNewParserCtxt();
10527 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010528 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010529 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010530 return(NULL);
10531 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010532 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010533 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10534 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010535 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010536 xmlFreeParserInputBuffer(buf);
10537 xmlFreeParserCtxt(ctxt);
10538 return(NULL);
10539 }
Owen Taylor3473f882001-02-23 17:55:21 +000010540 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010541#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010542 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010543#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010544 xmlFree(ctxt->sax);
10545 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10546 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010547 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010548 xmlFreeParserInputBuffer(buf);
10549 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010550 return(NULL);
10551 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010552 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10553 if (sax->initialized == XML_SAX2_MAGIC)
10554 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10555 else
10556 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010557 if (user_data != NULL)
10558 ctxt->userData = user_data;
10559 }
10560 if (filename == NULL) {
10561 ctxt->directory = NULL;
10562 } else {
10563 ctxt->directory = xmlParserGetDirectory(filename);
10564 }
10565
10566 inputStream = xmlNewInputStream(ctxt);
10567 if (inputStream == NULL) {
10568 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010569 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010570 return(NULL);
10571 }
10572
10573 if (filename == NULL)
10574 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010575 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010576 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010577 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010578 if (inputStream->filename == NULL) {
10579 xmlFreeParserCtxt(ctxt);
10580 xmlFreeParserInputBuffer(buf);
10581 return(NULL);
10582 }
10583 }
Owen Taylor3473f882001-02-23 17:55:21 +000010584 inputStream->buf = buf;
10585 inputStream->base = inputStream->buf->buffer->content;
10586 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010587 inputStream->end =
10588 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010589
10590 inputPush(ctxt, inputStream);
10591
William M. Brack3a1cd212005-02-11 14:35:54 +000010592 /*
10593 * If the caller didn't provide an initial 'chunk' for determining
10594 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10595 * that it can be automatically determined later
10596 */
10597 if ((size == 0) || (chunk == NULL)) {
10598 ctxt->charset = XML_CHAR_ENCODING_NONE;
10599 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010600 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10601 int cur = ctxt->input->cur - ctxt->input->base;
10602
Owen Taylor3473f882001-02-23 17:55:21 +000010603 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010604
10605 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10606 ctxt->input->cur = ctxt->input->base + cur;
10607 ctxt->input->end =
10608 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010609#ifdef DEBUG_PUSH
10610 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10611#endif
10612 }
10613
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010614 if (enc != XML_CHAR_ENCODING_NONE) {
10615 xmlSwitchEncoding(ctxt, enc);
10616 }
10617
Owen Taylor3473f882001-02-23 17:55:21 +000010618 return(ctxt);
10619}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010620#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010621
10622/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010623 * xmlStopParser:
10624 * @ctxt: an XML parser context
10625 *
10626 * Blocks further parser processing
10627 */
10628void
10629xmlStopParser(xmlParserCtxtPtr ctxt) {
10630 if (ctxt == NULL)
10631 return;
10632 ctxt->instate = XML_PARSER_EOF;
10633 ctxt->disableSAX = 1;
10634 if (ctxt->input != NULL) {
10635 ctxt->input->cur = BAD_CAST"";
10636 ctxt->input->base = ctxt->input->cur;
10637 }
10638}
10639
10640/**
Owen Taylor3473f882001-02-23 17:55:21 +000010641 * xmlCreateIOParserCtxt:
10642 * @sax: a SAX handler
10643 * @user_data: The user data returned on SAX callbacks
10644 * @ioread: an I/O read function
10645 * @ioclose: an I/O close function
10646 * @ioctx: an I/O handler
10647 * @enc: the charset encoding if known
10648 *
10649 * Create a parser context for using the XML parser with an existing
10650 * I/O stream
10651 *
10652 * Returns the new parser context or NULL
10653 */
10654xmlParserCtxtPtr
10655xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10656 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10657 void *ioctx, xmlCharEncoding enc) {
10658 xmlParserCtxtPtr ctxt;
10659 xmlParserInputPtr inputStream;
10660 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010661
10662 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010663
10664 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10665 if (buf == NULL) return(NULL);
10666
10667 ctxt = xmlNewParserCtxt();
10668 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010669 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010670 return(NULL);
10671 }
10672 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010673#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010674 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010675#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010676 xmlFree(ctxt->sax);
10677 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10678 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010679 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010680 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010681 return(NULL);
10682 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010683 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10684 if (sax->initialized == XML_SAX2_MAGIC)
10685 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10686 else
10687 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010688 if (user_data != NULL)
10689 ctxt->userData = user_data;
10690 }
10691
10692 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10693 if (inputStream == NULL) {
10694 xmlFreeParserCtxt(ctxt);
10695 return(NULL);
10696 }
10697 inputPush(ctxt, inputStream);
10698
10699 return(ctxt);
10700}
10701
Daniel Veillard4432df22003-09-28 18:58:27 +000010702#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010703/************************************************************************
10704 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010705 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010706 * *
10707 ************************************************************************/
10708
10709/**
10710 * xmlIOParseDTD:
10711 * @sax: the SAX handler block or NULL
10712 * @input: an Input Buffer
10713 * @enc: the charset encoding if known
10714 *
10715 * Load and parse a DTD
10716 *
10717 * Returns the resulting xmlDtdPtr or NULL in case of error.
10718 * @input will be freed at parsing end.
10719 */
10720
10721xmlDtdPtr
10722xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10723 xmlCharEncoding enc) {
10724 xmlDtdPtr ret = NULL;
10725 xmlParserCtxtPtr ctxt;
10726 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010727 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010728
10729 if (input == NULL)
10730 return(NULL);
10731
10732 ctxt = xmlNewParserCtxt();
10733 if (ctxt == NULL) {
10734 return(NULL);
10735 }
10736
10737 /*
10738 * Set-up the SAX context
10739 */
10740 if (sax != NULL) {
10741 if (ctxt->sax != NULL)
10742 xmlFree(ctxt->sax);
10743 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010744 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010745 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010746 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010747
10748 /*
10749 * generate a parser input from the I/O handler
10750 */
10751
Daniel Veillard43caefb2003-12-07 19:32:22 +000010752 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010753 if (pinput == NULL) {
10754 if (sax != NULL) ctxt->sax = NULL;
10755 xmlFreeParserCtxt(ctxt);
10756 return(NULL);
10757 }
10758
10759 /*
10760 * plug some encoding conversion routines here.
10761 */
10762 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010763 if (enc != XML_CHAR_ENCODING_NONE) {
10764 xmlSwitchEncoding(ctxt, enc);
10765 }
Owen Taylor3473f882001-02-23 17:55:21 +000010766
10767 pinput->filename = NULL;
10768 pinput->line = 1;
10769 pinput->col = 1;
10770 pinput->base = ctxt->input->cur;
10771 pinput->cur = ctxt->input->cur;
10772 pinput->free = NULL;
10773
10774 /*
10775 * let's parse that entity knowing it's an external subset.
10776 */
10777 ctxt->inSubset = 2;
10778 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10779 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10780 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010781
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010782 if ((enc == XML_CHAR_ENCODING_NONE) &&
10783 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010784 /*
10785 * Get the 4 first bytes and decode the charset
10786 * if enc != XML_CHAR_ENCODING_NONE
10787 * plug some encoding conversion routines.
10788 */
10789 start[0] = RAW;
10790 start[1] = NXT(1);
10791 start[2] = NXT(2);
10792 start[3] = NXT(3);
10793 enc = xmlDetectCharEncoding(start, 4);
10794 if (enc != XML_CHAR_ENCODING_NONE) {
10795 xmlSwitchEncoding(ctxt, enc);
10796 }
10797 }
10798
Owen Taylor3473f882001-02-23 17:55:21 +000010799 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10800
10801 if (ctxt->myDoc != NULL) {
10802 if (ctxt->wellFormed) {
10803 ret = ctxt->myDoc->extSubset;
10804 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010805 if (ret != NULL) {
10806 xmlNodePtr tmp;
10807
10808 ret->doc = NULL;
10809 tmp = ret->children;
10810 while (tmp != NULL) {
10811 tmp->doc = NULL;
10812 tmp = tmp->next;
10813 }
10814 }
Owen Taylor3473f882001-02-23 17:55:21 +000010815 } else {
10816 ret = NULL;
10817 }
10818 xmlFreeDoc(ctxt->myDoc);
10819 ctxt->myDoc = NULL;
10820 }
10821 if (sax != NULL) ctxt->sax = NULL;
10822 xmlFreeParserCtxt(ctxt);
10823
10824 return(ret);
10825}
10826
10827/**
10828 * xmlSAXParseDTD:
10829 * @sax: the SAX handler block
10830 * @ExternalID: a NAME* containing the External ID of the DTD
10831 * @SystemID: a NAME* containing the URL to the DTD
10832 *
10833 * Load and parse an external subset.
10834 *
10835 * Returns the resulting xmlDtdPtr or NULL in case of error.
10836 */
10837
10838xmlDtdPtr
10839xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10840 const xmlChar *SystemID) {
10841 xmlDtdPtr ret = NULL;
10842 xmlParserCtxtPtr ctxt;
10843 xmlParserInputPtr input = NULL;
10844 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010845 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010846
10847 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10848
10849 ctxt = xmlNewParserCtxt();
10850 if (ctxt == NULL) {
10851 return(NULL);
10852 }
10853
10854 /*
10855 * Set-up the SAX context
10856 */
10857 if (sax != NULL) {
10858 if (ctxt->sax != NULL)
10859 xmlFree(ctxt->sax);
10860 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010861 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010862 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010863
10864 /*
10865 * Canonicalise the system ID
10866 */
10867 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010868 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010869 xmlFreeParserCtxt(ctxt);
10870 return(NULL);
10871 }
Owen Taylor3473f882001-02-23 17:55:21 +000010872
10873 /*
10874 * Ask the Entity resolver to load the damn thing
10875 */
10876
10877 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010878 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010879 if (input == NULL) {
10880 if (sax != NULL) ctxt->sax = NULL;
10881 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010882 if (systemIdCanonic != NULL)
10883 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010884 return(NULL);
10885 }
10886
10887 /*
10888 * plug some encoding conversion routines here.
10889 */
10890 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010891 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10892 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10893 xmlSwitchEncoding(ctxt, enc);
10894 }
Owen Taylor3473f882001-02-23 17:55:21 +000010895
10896 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010897 input->filename = (char *) systemIdCanonic;
10898 else
10899 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010900 input->line = 1;
10901 input->col = 1;
10902 input->base = ctxt->input->cur;
10903 input->cur = ctxt->input->cur;
10904 input->free = NULL;
10905
10906 /*
10907 * let's parse that entity knowing it's an external subset.
10908 */
10909 ctxt->inSubset = 2;
10910 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10911 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10912 ExternalID, SystemID);
10913 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10914
10915 if (ctxt->myDoc != NULL) {
10916 if (ctxt->wellFormed) {
10917 ret = ctxt->myDoc->extSubset;
10918 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010919 if (ret != NULL) {
10920 xmlNodePtr tmp;
10921
10922 ret->doc = NULL;
10923 tmp = ret->children;
10924 while (tmp != NULL) {
10925 tmp->doc = NULL;
10926 tmp = tmp->next;
10927 }
10928 }
Owen Taylor3473f882001-02-23 17:55:21 +000010929 } else {
10930 ret = NULL;
10931 }
10932 xmlFreeDoc(ctxt->myDoc);
10933 ctxt->myDoc = NULL;
10934 }
10935 if (sax != NULL) ctxt->sax = NULL;
10936 xmlFreeParserCtxt(ctxt);
10937
10938 return(ret);
10939}
10940
Daniel Veillard4432df22003-09-28 18:58:27 +000010941
Owen Taylor3473f882001-02-23 17:55:21 +000010942/**
10943 * xmlParseDTD:
10944 * @ExternalID: a NAME* containing the External ID of the DTD
10945 * @SystemID: a NAME* containing the URL to the DTD
10946 *
10947 * Load and parse an external subset.
10948 *
10949 * Returns the resulting xmlDtdPtr or NULL in case of error.
10950 */
10951
10952xmlDtdPtr
10953xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10954 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10955}
Daniel Veillard4432df22003-09-28 18:58:27 +000010956#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010957
10958/************************************************************************
10959 * *
10960 * Front ends when parsing an Entity *
10961 * *
10962 ************************************************************************/
10963
10964/**
Owen Taylor3473f882001-02-23 17:55:21 +000010965 * xmlParseCtxtExternalEntity:
10966 * @ctx: the existing parsing context
10967 * @URL: the URL for the entity to load
10968 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010969 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010970 *
10971 * Parse an external general entity within an existing parsing context
10972 * An external general parsed entity is well-formed if it matches the
10973 * production labeled extParsedEnt.
10974 *
10975 * [78] extParsedEnt ::= TextDecl? content
10976 *
10977 * Returns 0 if the entity is well formed, -1 in case of args problem and
10978 * the parser error code otherwise
10979 */
10980
10981int
10982xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010983 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010984 xmlParserCtxtPtr ctxt;
10985 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010986 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010987 xmlSAXHandlerPtr oldsax = NULL;
10988 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010989 xmlChar start[4];
10990 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010991
Daniel Veillardce682bc2004-11-05 17:22:25 +000010992 if (ctx == NULL) return(-1);
10993
Owen Taylor3473f882001-02-23 17:55:21 +000010994 if (ctx->depth > 40) {
10995 return(XML_ERR_ENTITY_LOOP);
10996 }
10997
Daniel Veillardcda96922001-08-21 10:56:31 +000010998 if (lst != NULL)
10999 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011000 if ((URL == NULL) && (ID == NULL))
11001 return(-1);
11002 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11003 return(-1);
11004
11005
11006 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11007 if (ctxt == NULL) return(-1);
11008 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011009 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011010 oldsax = ctxt->sax;
11011 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011012 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011013 newDoc = xmlNewDoc(BAD_CAST "1.0");
11014 if (newDoc == NULL) {
11015 xmlFreeParserCtxt(ctxt);
11016 return(-1);
11017 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011018 if (ctx->myDoc->dict) {
11019 newDoc->dict = ctx->myDoc->dict;
11020 xmlDictReference(newDoc->dict);
11021 }
Owen Taylor3473f882001-02-23 17:55:21 +000011022 if (ctx->myDoc != NULL) {
11023 newDoc->intSubset = ctx->myDoc->intSubset;
11024 newDoc->extSubset = ctx->myDoc->extSubset;
11025 }
11026 if (ctx->myDoc->URL != NULL) {
11027 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11028 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011029 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11030 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011031 ctxt->sax = oldsax;
11032 xmlFreeParserCtxt(ctxt);
11033 newDoc->intSubset = NULL;
11034 newDoc->extSubset = NULL;
11035 xmlFreeDoc(newDoc);
11036 return(-1);
11037 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011038 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011039 nodePush(ctxt, newDoc->children);
11040 if (ctx->myDoc == NULL) {
11041 ctxt->myDoc = newDoc;
11042 } else {
11043 ctxt->myDoc = ctx->myDoc;
11044 newDoc->children->doc = ctx->myDoc;
11045 }
11046
Daniel Veillard87a764e2001-06-20 17:41:10 +000011047 /*
11048 * Get the 4 first bytes and decode the charset
11049 * if enc != XML_CHAR_ENCODING_NONE
11050 * plug some encoding conversion routines.
11051 */
11052 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011053 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11054 start[0] = RAW;
11055 start[1] = NXT(1);
11056 start[2] = NXT(2);
11057 start[3] = NXT(3);
11058 enc = xmlDetectCharEncoding(start, 4);
11059 if (enc != XML_CHAR_ENCODING_NONE) {
11060 xmlSwitchEncoding(ctxt, enc);
11061 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011062 }
11063
Owen Taylor3473f882001-02-23 17:55:21 +000011064 /*
11065 * Parse a possible text declaration first
11066 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011067 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011068 xmlParseTextDecl(ctxt);
11069 }
11070
11071 /*
11072 * Doing validity checking on chunk doesn't make sense
11073 */
11074 ctxt->instate = XML_PARSER_CONTENT;
11075 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011076 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011077 ctxt->loadsubset = ctx->loadsubset;
11078 ctxt->depth = ctx->depth + 1;
11079 ctxt->replaceEntities = ctx->replaceEntities;
11080 if (ctxt->validate) {
11081 ctxt->vctxt.error = ctx->vctxt.error;
11082 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011083 } else {
11084 ctxt->vctxt.error = NULL;
11085 ctxt->vctxt.warning = NULL;
11086 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011087 ctxt->vctxt.nodeTab = NULL;
11088 ctxt->vctxt.nodeNr = 0;
11089 ctxt->vctxt.nodeMax = 0;
11090 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011091 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11092 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011093 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11094 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11095 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011096 ctxt->dictNames = ctx->dictNames;
11097 ctxt->attsDefault = ctx->attsDefault;
11098 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011099 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011100
11101 xmlParseContent(ctxt);
11102
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011103 ctx->validate = ctxt->validate;
11104 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011105 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011106 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011107 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011108 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011109 }
11110 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011111 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011112 }
11113
11114 if (!ctxt->wellFormed) {
11115 if (ctxt->errNo == 0)
11116 ret = 1;
11117 else
11118 ret = ctxt->errNo;
11119 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011120 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011121 xmlNodePtr cur;
11122
11123 /*
11124 * Return the newly created nodeset after unlinking it from
11125 * they pseudo parent.
11126 */
11127 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011128 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011129 while (cur != NULL) {
11130 cur->parent = NULL;
11131 cur = cur->next;
11132 }
11133 newDoc->children->children = NULL;
11134 }
11135 ret = 0;
11136 }
11137 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011138 ctxt->dict = NULL;
11139 ctxt->attsDefault = NULL;
11140 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011141 xmlFreeParserCtxt(ctxt);
11142 newDoc->intSubset = NULL;
11143 newDoc->extSubset = NULL;
11144 xmlFreeDoc(newDoc);
11145
11146 return(ret);
11147}
11148
11149/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011150 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011151 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011152 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011153 * @sax: the SAX handler bloc (possibly NULL)
11154 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11155 * @depth: Used for loop detection, use 0
11156 * @URL: the URL for the entity to load
11157 * @ID: the System ID for the entity to load
11158 * @list: the return value for the set of parsed nodes
11159 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011160 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011161 *
11162 * Returns 0 if the entity is well formed, -1 in case of args problem and
11163 * the parser error code otherwise
11164 */
11165
Daniel Veillard7d515752003-09-26 19:12:37 +000011166static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011167xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11168 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011169 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011170 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011171 xmlParserCtxtPtr ctxt;
11172 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011173 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011174 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011175 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011176 xmlChar start[4];
11177 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011178
11179 if (depth > 40) {
11180 return(XML_ERR_ENTITY_LOOP);
11181 }
11182
11183
11184
11185 if (list != NULL)
11186 *list = NULL;
11187 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011188 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011189 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011190 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011191
11192
11193 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011194 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011195 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011196 if (oldctxt != NULL) {
11197 ctxt->_private = oldctxt->_private;
11198 ctxt->loadsubset = oldctxt->loadsubset;
11199 ctxt->validate = oldctxt->validate;
11200 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011201 ctxt->record_info = oldctxt->record_info;
11202 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11203 ctxt->node_seq.length = oldctxt->node_seq.length;
11204 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011205 } else {
11206 /*
11207 * Doing validity checking on chunk without context
11208 * doesn't make sense
11209 */
11210 ctxt->_private = NULL;
11211 ctxt->validate = 0;
11212 ctxt->external = 2;
11213 ctxt->loadsubset = 0;
11214 }
Owen Taylor3473f882001-02-23 17:55:21 +000011215 if (sax != NULL) {
11216 oldsax = ctxt->sax;
11217 ctxt->sax = sax;
11218 if (user_data != NULL)
11219 ctxt->userData = user_data;
11220 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011221 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011222 newDoc = xmlNewDoc(BAD_CAST "1.0");
11223 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011224 ctxt->node_seq.maximum = 0;
11225 ctxt->node_seq.length = 0;
11226 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011227 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011228 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011229 }
11230 if (doc != NULL) {
11231 newDoc->intSubset = doc->intSubset;
11232 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011233 newDoc->dict = doc->dict;
11234 } else if (oldctxt != NULL) {
11235 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011236 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011237 xmlDictReference(newDoc->dict);
11238
Owen Taylor3473f882001-02-23 17:55:21 +000011239 if (doc->URL != NULL) {
11240 newDoc->URL = xmlStrdup(doc->URL);
11241 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011242 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11243 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011244 if (sax != NULL)
11245 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011246 ctxt->node_seq.maximum = 0;
11247 ctxt->node_seq.length = 0;
11248 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011249 xmlFreeParserCtxt(ctxt);
11250 newDoc->intSubset = NULL;
11251 newDoc->extSubset = NULL;
11252 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011253 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011254 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011255 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011256 nodePush(ctxt, newDoc->children);
11257 if (doc == NULL) {
11258 ctxt->myDoc = newDoc;
11259 } else {
11260 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011261 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011262 }
11263
Daniel Veillard87a764e2001-06-20 17:41:10 +000011264 /*
11265 * Get the 4 first bytes and decode the charset
11266 * if enc != XML_CHAR_ENCODING_NONE
11267 * plug some encoding conversion routines.
11268 */
11269 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011270 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11271 start[0] = RAW;
11272 start[1] = NXT(1);
11273 start[2] = NXT(2);
11274 start[3] = NXT(3);
11275 enc = xmlDetectCharEncoding(start, 4);
11276 if (enc != XML_CHAR_ENCODING_NONE) {
11277 xmlSwitchEncoding(ctxt, enc);
11278 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011279 }
11280
Owen Taylor3473f882001-02-23 17:55:21 +000011281 /*
11282 * Parse a possible text declaration first
11283 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011284 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011285 xmlParseTextDecl(ctxt);
11286 }
11287
Owen Taylor3473f882001-02-23 17:55:21 +000011288 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011289 ctxt->depth = depth;
11290
11291 xmlParseContent(ctxt);
11292
Daniel Veillard561b7f82002-03-20 21:55:57 +000011293 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011295 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011296 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011297 }
11298 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011299 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011300 }
11301
11302 if (!ctxt->wellFormed) {
11303 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011304 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011305 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011306 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011307 } else {
11308 if (list != NULL) {
11309 xmlNodePtr cur;
11310
11311 /*
11312 * Return the newly created nodeset after unlinking it from
11313 * they pseudo parent.
11314 */
11315 cur = newDoc->children->children;
11316 *list = cur;
11317 while (cur != NULL) {
11318 cur->parent = NULL;
11319 cur = cur->next;
11320 }
11321 newDoc->children->children = NULL;
11322 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011323 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011324 }
11325 if (sax != NULL)
11326 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011327 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11328 oldctxt->node_seq.length = ctxt->node_seq.length;
11329 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011330 ctxt->node_seq.maximum = 0;
11331 ctxt->node_seq.length = 0;
11332 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011333 xmlFreeParserCtxt(ctxt);
11334 newDoc->intSubset = NULL;
11335 newDoc->extSubset = NULL;
11336 xmlFreeDoc(newDoc);
11337
11338 return(ret);
11339}
11340
Daniel Veillard81273902003-09-30 00:43:48 +000011341#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011342/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011343 * xmlParseExternalEntity:
11344 * @doc: the document the chunk pertains to
11345 * @sax: the SAX handler bloc (possibly NULL)
11346 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11347 * @depth: Used for loop detection, use 0
11348 * @URL: the URL for the entity to load
11349 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011350 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011351 *
11352 * Parse an external general entity
11353 * An external general parsed entity is well-formed if it matches the
11354 * production labeled extParsedEnt.
11355 *
11356 * [78] extParsedEnt ::= TextDecl? content
11357 *
11358 * Returns 0 if the entity is well formed, -1 in case of args problem and
11359 * the parser error code otherwise
11360 */
11361
11362int
11363xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011364 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011365 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011366 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011367}
11368
11369/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011370 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011371 * @doc: the document the chunk pertains to
11372 * @sax: the SAX handler bloc (possibly NULL)
11373 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11374 * @depth: Used for loop detection, use 0
11375 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011376 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011377 *
11378 * Parse a well-balanced chunk of an XML document
11379 * called by the parser
11380 * The allowed sequence for the Well Balanced Chunk is the one defined by
11381 * the content production in the XML grammar:
11382 *
11383 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11384 *
11385 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11386 * the parser error code otherwise
11387 */
11388
11389int
11390xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011391 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011392 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11393 depth, string, lst, 0 );
11394}
Daniel Veillard81273902003-09-30 00:43:48 +000011395#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011396
11397/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011398 * xmlParseBalancedChunkMemoryInternal:
11399 * @oldctxt: the existing parsing context
11400 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11401 * @user_data: the user data field for the parser context
11402 * @lst: the return value for the set of parsed nodes
11403 *
11404 *
11405 * Parse a well-balanced chunk of an XML document
11406 * called by the parser
11407 * The allowed sequence for the Well Balanced Chunk is the one defined by
11408 * the content production in the XML grammar:
11409 *
11410 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11411 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011412 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11413 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011414 *
11415 * In case recover is set to 1, the nodelist will not be empty even if
11416 * the parsed chunk is not well balanced.
11417 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011418static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011419xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11420 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11421 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011422 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011423 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011424 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011425 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011426 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011427 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011428 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011429
11430 if (oldctxt->depth > 40) {
11431 return(XML_ERR_ENTITY_LOOP);
11432 }
11433
11434
11435 if (lst != NULL)
11436 *lst = NULL;
11437 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011438 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011439
11440 size = xmlStrlen(string);
11441
11442 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011443 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011444 if (user_data != NULL)
11445 ctxt->userData = user_data;
11446 else
11447 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011448 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11449 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011450 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11451 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11452 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011453
11454 oldsax = ctxt->sax;
11455 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011456 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011457 ctxt->replaceEntities = oldctxt->replaceEntities;
11458 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011459
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011460 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011461 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011462 newDoc = xmlNewDoc(BAD_CAST "1.0");
11463 if (newDoc == NULL) {
11464 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011465 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011466 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011467 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011468 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011469 newDoc->dict = ctxt->dict;
11470 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011471 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011472 } else {
11473 ctxt->myDoc = oldctxt->myDoc;
11474 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011475 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011476 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011477 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11478 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011479 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011480 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011481 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011482 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011483 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011484 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011485 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011486 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011487 ctxt->myDoc->children = NULL;
11488 ctxt->myDoc->last = NULL;
11489 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011490 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011491 ctxt->instate = XML_PARSER_CONTENT;
11492 ctxt->depth = oldctxt->depth + 1;
11493
Daniel Veillard328f48c2002-11-15 15:24:34 +000011494 ctxt->validate = 0;
11495 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011496 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11497 /*
11498 * ID/IDREF registration will be done in xmlValidateElement below
11499 */
11500 ctxt->loadsubset |= XML_SKIP_IDS;
11501 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011502 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011503 ctxt->attsDefault = oldctxt->attsDefault;
11504 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011505
Daniel Veillard68e9e742002-11-16 15:35:11 +000011506 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011507 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011508 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011509 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011510 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011511 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011512 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011513 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011514 }
11515
11516 if (!ctxt->wellFormed) {
11517 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011518 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011519 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011520 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011521 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011522 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011523 }
11524
William M. Brack7b9154b2003-09-27 19:23:50 +000011525 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011526 xmlNodePtr cur;
11527
11528 /*
11529 * Return the newly created nodeset after unlinking it from
11530 * they pseudo parent.
11531 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011532 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011533 *lst = cur;
11534 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011535#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011536 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11537 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11538 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011539 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11540 oldctxt->myDoc, cur);
11541 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011542#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011543 cur->parent = NULL;
11544 cur = cur->next;
11545 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011546 ctxt->myDoc->children->children = NULL;
11547 }
11548 if (ctxt->myDoc != NULL) {
11549 xmlFreeNode(ctxt->myDoc->children);
11550 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011551 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011552 }
11553
11554 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011555 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011556 ctxt->attsDefault = NULL;
11557 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011558 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011559 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011560 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011561 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011562
11563 return(ret);
11564}
11565
Daniel Veillard29b17482004-08-16 00:39:03 +000011566/**
11567 * xmlParseInNodeContext:
11568 * @node: the context node
11569 * @data: the input string
11570 * @datalen: the input string length in bytes
11571 * @options: a combination of xmlParserOption
11572 * @lst: the return value for the set of parsed nodes
11573 *
11574 * Parse a well-balanced chunk of an XML document
11575 * within the context (DTD, namespaces, etc ...) of the given node.
11576 *
11577 * The allowed sequence for the data is a Well Balanced Chunk defined by
11578 * the content production in the XML grammar:
11579 *
11580 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11581 *
11582 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11583 * error code otherwise
11584 */
11585xmlParserErrors
11586xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11587 int options, xmlNodePtr *lst) {
11588#ifdef SAX2
11589 xmlParserCtxtPtr ctxt;
11590 xmlDocPtr doc = NULL;
11591 xmlNodePtr fake, cur;
11592 int nsnr = 0;
11593
11594 xmlParserErrors ret = XML_ERR_OK;
11595
11596 /*
11597 * check all input parameters, grab the document
11598 */
11599 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11600 return(XML_ERR_INTERNAL_ERROR);
11601 switch (node->type) {
11602 case XML_ELEMENT_NODE:
11603 case XML_ATTRIBUTE_NODE:
11604 case XML_TEXT_NODE:
11605 case XML_CDATA_SECTION_NODE:
11606 case XML_ENTITY_REF_NODE:
11607 case XML_PI_NODE:
11608 case XML_COMMENT_NODE:
11609 case XML_DOCUMENT_NODE:
11610 case XML_HTML_DOCUMENT_NODE:
11611 break;
11612 default:
11613 return(XML_ERR_INTERNAL_ERROR);
11614
11615 }
11616 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11617 (node->type != XML_DOCUMENT_NODE) &&
11618 (node->type != XML_HTML_DOCUMENT_NODE))
11619 node = node->parent;
11620 if (node == NULL)
11621 return(XML_ERR_INTERNAL_ERROR);
11622 if (node->type == XML_ELEMENT_NODE)
11623 doc = node->doc;
11624 else
11625 doc = (xmlDocPtr) node;
11626 if (doc == NULL)
11627 return(XML_ERR_INTERNAL_ERROR);
11628
11629 /*
11630 * allocate a context and set-up everything not related to the
11631 * node position in the tree
11632 */
11633 if (doc->type == XML_DOCUMENT_NODE)
11634 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11635#ifdef LIBXML_HTML_ENABLED
11636 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11637 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11638#endif
11639 else
11640 return(XML_ERR_INTERNAL_ERROR);
11641
11642 if (ctxt == NULL)
11643 return(XML_ERR_NO_MEMORY);
11644 fake = xmlNewComment(NULL);
11645 if (fake == NULL) {
11646 xmlFreeParserCtxt(ctxt);
11647 return(XML_ERR_NO_MEMORY);
11648 }
11649 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011650
11651 /*
11652 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11653 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11654 * we must wait until the last moment to free the original one.
11655 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011656 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011657 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011658 xmlDictFree(ctxt->dict);
11659 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011660 } else
11661 options |= XML_PARSE_NODICT;
11662
11663 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011664 xmlDetectSAX2(ctxt);
11665 ctxt->myDoc = doc;
11666
11667 if (node->type == XML_ELEMENT_NODE) {
11668 nodePush(ctxt, node);
11669 /*
11670 * initialize the SAX2 namespaces stack
11671 */
11672 cur = node;
11673 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11674 xmlNsPtr ns = cur->nsDef;
11675 const xmlChar *iprefix, *ihref;
11676
11677 while (ns != NULL) {
11678 if (ctxt->dict) {
11679 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11680 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11681 } else {
11682 iprefix = ns->prefix;
11683 ihref = ns->href;
11684 }
11685
11686 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11687 nsPush(ctxt, iprefix, ihref);
11688 nsnr++;
11689 }
11690 ns = ns->next;
11691 }
11692 cur = cur->parent;
11693 }
11694 ctxt->instate = XML_PARSER_CONTENT;
11695 }
11696
11697 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11698 /*
11699 * ID/IDREF registration will be done in xmlValidateElement below
11700 */
11701 ctxt->loadsubset |= XML_SKIP_IDS;
11702 }
11703
11704 xmlParseContent(ctxt);
11705 nsPop(ctxt, nsnr);
11706 if ((RAW == '<') && (NXT(1) == '/')) {
11707 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11708 } else if (RAW != 0) {
11709 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11710 }
11711 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11712 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11713 ctxt->wellFormed = 0;
11714 }
11715
11716 if (!ctxt->wellFormed) {
11717 if (ctxt->errNo == 0)
11718 ret = XML_ERR_INTERNAL_ERROR;
11719 else
11720 ret = (xmlParserErrors)ctxt->errNo;
11721 } else {
11722 ret = XML_ERR_OK;
11723 }
11724
11725 /*
11726 * Return the newly created nodeset after unlinking it from
11727 * the pseudo sibling.
11728 */
11729
11730 cur = fake->next;
11731 fake->next = NULL;
11732 node->last = fake;
11733
11734 if (cur != NULL) {
11735 cur->prev = NULL;
11736 }
11737
11738 *lst = cur;
11739
11740 while (cur != NULL) {
11741 cur->parent = NULL;
11742 cur = cur->next;
11743 }
11744
11745 xmlUnlinkNode(fake);
11746 xmlFreeNode(fake);
11747
11748
11749 if (ret != XML_ERR_OK) {
11750 xmlFreeNodeList(*lst);
11751 *lst = NULL;
11752 }
William M. Brackc3f81342004-10-03 01:22:44 +000011753
William M. Brackb7b54de2004-10-06 16:38:01 +000011754 if (doc->dict != NULL)
11755 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011756 xmlFreeParserCtxt(ctxt);
11757
11758 return(ret);
11759#else /* !SAX2 */
11760 return(XML_ERR_INTERNAL_ERROR);
11761#endif
11762}
11763
Daniel Veillard81273902003-09-30 00:43:48 +000011764#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011765/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011766 * xmlParseBalancedChunkMemoryRecover:
11767 * @doc: the document the chunk pertains to
11768 * @sax: the SAX handler bloc (possibly NULL)
11769 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11770 * @depth: Used for loop detection, use 0
11771 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11772 * @lst: the return value for the set of parsed nodes
11773 * @recover: return nodes even if the data is broken (use 0)
11774 *
11775 *
11776 * Parse a well-balanced chunk of an XML document
11777 * called by the parser
11778 * The allowed sequence for the Well Balanced Chunk is the one defined by
11779 * the content production in the XML grammar:
11780 *
11781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11782 *
11783 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11784 * the parser error code otherwise
11785 *
11786 * In case recover is set to 1, the nodelist will not be empty even if
11787 * the parsed chunk is not well balanced.
11788 */
11789int
11790xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11791 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11792 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011793 xmlParserCtxtPtr ctxt;
11794 xmlDocPtr newDoc;
11795 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011796 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011797 int size;
11798 int ret = 0;
11799
11800 if (depth > 40) {
11801 return(XML_ERR_ENTITY_LOOP);
11802 }
11803
11804
Daniel Veillardcda96922001-08-21 10:56:31 +000011805 if (lst != NULL)
11806 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011807 if (string == NULL)
11808 return(-1);
11809
11810 size = xmlStrlen(string);
11811
11812 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11813 if (ctxt == NULL) return(-1);
11814 ctxt->userData = ctxt;
11815 if (sax != NULL) {
11816 oldsax = ctxt->sax;
11817 ctxt->sax = sax;
11818 if (user_data != NULL)
11819 ctxt->userData = user_data;
11820 }
11821 newDoc = xmlNewDoc(BAD_CAST "1.0");
11822 if (newDoc == NULL) {
11823 xmlFreeParserCtxt(ctxt);
11824 return(-1);
11825 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011826 if ((doc != NULL) && (doc->dict != NULL)) {
11827 xmlDictFree(ctxt->dict);
11828 ctxt->dict = doc->dict;
11829 xmlDictReference(ctxt->dict);
11830 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11831 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11832 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11833 ctxt->dictNames = 1;
11834 } else {
11835 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11836 }
Owen Taylor3473f882001-02-23 17:55:21 +000011837 if (doc != NULL) {
11838 newDoc->intSubset = doc->intSubset;
11839 newDoc->extSubset = doc->extSubset;
11840 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011841 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11842 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011843 if (sax != NULL)
11844 ctxt->sax = oldsax;
11845 xmlFreeParserCtxt(ctxt);
11846 newDoc->intSubset = NULL;
11847 newDoc->extSubset = NULL;
11848 xmlFreeDoc(newDoc);
11849 return(-1);
11850 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011851 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11852 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011853 if (doc == NULL) {
11854 ctxt->myDoc = newDoc;
11855 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011856 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011857 newDoc->children->doc = doc;
11858 }
11859 ctxt->instate = XML_PARSER_CONTENT;
11860 ctxt->depth = depth;
11861
11862 /*
11863 * Doing validity checking on chunk doesn't make sense
11864 */
11865 ctxt->validate = 0;
11866 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011867 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011868
Daniel Veillardb39bc392002-10-26 19:29:51 +000011869 if ( doc != NULL ){
11870 content = doc->children;
11871 doc->children = NULL;
11872 xmlParseContent(ctxt);
11873 doc->children = content;
11874 }
11875 else {
11876 xmlParseContent(ctxt);
11877 }
Owen Taylor3473f882001-02-23 17:55:21 +000011878 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011879 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011880 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011881 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011882 }
11883 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011885 }
11886
11887 if (!ctxt->wellFormed) {
11888 if (ctxt->errNo == 0)
11889 ret = 1;
11890 else
11891 ret = ctxt->errNo;
11892 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011893 ret = 0;
11894 }
11895
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011896 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11897 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011898
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011899 /*
11900 * Return the newly created nodeset after unlinking it from
11901 * they pseudo parent.
11902 */
11903 cur = newDoc->children->children;
11904 *lst = cur;
11905 while (cur != NULL) {
11906 xmlSetTreeDoc(cur, doc);
11907 cur->parent = NULL;
11908 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011909 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011910 newDoc->children->children = NULL;
11911 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011912
Owen Taylor3473f882001-02-23 17:55:21 +000011913 if (sax != NULL)
11914 ctxt->sax = oldsax;
11915 xmlFreeParserCtxt(ctxt);
11916 newDoc->intSubset = NULL;
11917 newDoc->extSubset = NULL;
11918 xmlFreeDoc(newDoc);
11919
11920 return(ret);
11921}
11922
11923/**
11924 * xmlSAXParseEntity:
11925 * @sax: the SAX handler block
11926 * @filename: the filename
11927 *
11928 * parse an XML external entity out of context and build a tree.
11929 * It use the given SAX function block to handle the parsing callback.
11930 * If sax is NULL, fallback to the default DOM tree building routines.
11931 *
11932 * [78] extParsedEnt ::= TextDecl? content
11933 *
11934 * This correspond to a "Well Balanced" chunk
11935 *
11936 * Returns the resulting document tree
11937 */
11938
11939xmlDocPtr
11940xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11941 xmlDocPtr ret;
11942 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011943
11944 ctxt = xmlCreateFileParserCtxt(filename);
11945 if (ctxt == NULL) {
11946 return(NULL);
11947 }
11948 if (sax != NULL) {
11949 if (ctxt->sax != NULL)
11950 xmlFree(ctxt->sax);
11951 ctxt->sax = sax;
11952 ctxt->userData = NULL;
11953 }
11954
Owen Taylor3473f882001-02-23 17:55:21 +000011955 xmlParseExtParsedEnt(ctxt);
11956
11957 if (ctxt->wellFormed)
11958 ret = ctxt->myDoc;
11959 else {
11960 ret = NULL;
11961 xmlFreeDoc(ctxt->myDoc);
11962 ctxt->myDoc = NULL;
11963 }
11964 if (sax != NULL)
11965 ctxt->sax = NULL;
11966 xmlFreeParserCtxt(ctxt);
11967
11968 return(ret);
11969}
11970
11971/**
11972 * xmlParseEntity:
11973 * @filename: the filename
11974 *
11975 * parse an XML external entity out of context and build a tree.
11976 *
11977 * [78] extParsedEnt ::= TextDecl? content
11978 *
11979 * This correspond to a "Well Balanced" chunk
11980 *
11981 * Returns the resulting document tree
11982 */
11983
11984xmlDocPtr
11985xmlParseEntity(const char *filename) {
11986 return(xmlSAXParseEntity(NULL, filename));
11987}
Daniel Veillard81273902003-09-30 00:43:48 +000011988#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011989
11990/**
11991 * xmlCreateEntityParserCtxt:
11992 * @URL: the entity URL
11993 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011994 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011995 *
11996 * Create a parser context for an external entity
11997 * Automatic support for ZLIB/Compress compressed document is provided
11998 * by default if found at compile-time.
11999 *
12000 * Returns the new parser context or NULL
12001 */
12002xmlParserCtxtPtr
12003xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12004 const xmlChar *base) {
12005 xmlParserCtxtPtr ctxt;
12006 xmlParserInputPtr inputStream;
12007 char *directory = NULL;
12008 xmlChar *uri;
12009
12010 ctxt = xmlNewParserCtxt();
12011 if (ctxt == NULL) {
12012 return(NULL);
12013 }
12014
12015 uri = xmlBuildURI(URL, base);
12016
12017 if (uri == NULL) {
12018 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12019 if (inputStream == NULL) {
12020 xmlFreeParserCtxt(ctxt);
12021 return(NULL);
12022 }
12023
12024 inputPush(ctxt, inputStream);
12025
12026 if ((ctxt->directory == NULL) && (directory == NULL))
12027 directory = xmlParserGetDirectory((char *)URL);
12028 if ((ctxt->directory == NULL) && (directory != NULL))
12029 ctxt->directory = directory;
12030 } else {
12031 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12032 if (inputStream == NULL) {
12033 xmlFree(uri);
12034 xmlFreeParserCtxt(ctxt);
12035 return(NULL);
12036 }
12037
12038 inputPush(ctxt, inputStream);
12039
12040 if ((ctxt->directory == NULL) && (directory == NULL))
12041 directory = xmlParserGetDirectory((char *)uri);
12042 if ((ctxt->directory == NULL) && (directory != NULL))
12043 ctxt->directory = directory;
12044 xmlFree(uri);
12045 }
Owen Taylor3473f882001-02-23 17:55:21 +000012046 return(ctxt);
12047}
12048
12049/************************************************************************
12050 * *
12051 * Front ends when parsing from a file *
12052 * *
12053 ************************************************************************/
12054
12055/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012056 * xmlCreateURLParserCtxt:
12057 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012058 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012059 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012060 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012061 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012062 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012063 *
12064 * Returns the new parser context or NULL
12065 */
12066xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012067xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012068{
12069 xmlParserCtxtPtr ctxt;
12070 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012071 char *directory = NULL;
12072
Owen Taylor3473f882001-02-23 17:55:21 +000012073 ctxt = xmlNewParserCtxt();
12074 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012075 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012076 return(NULL);
12077 }
12078
Daniel Veillarddf292f72005-01-16 19:00:15 +000012079 if (options)
12080 xmlCtxtUseOptions(ctxt, options);
12081 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012082
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012083 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012084 if (inputStream == NULL) {
12085 xmlFreeParserCtxt(ctxt);
12086 return(NULL);
12087 }
12088
Owen Taylor3473f882001-02-23 17:55:21 +000012089 inputPush(ctxt, inputStream);
12090 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012091 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012092 if ((ctxt->directory == NULL) && (directory != NULL))
12093 ctxt->directory = directory;
12094
12095 return(ctxt);
12096}
12097
Daniel Veillard61b93382003-11-03 14:28:31 +000012098/**
12099 * xmlCreateFileParserCtxt:
12100 * @filename: the filename
12101 *
12102 * Create a parser context for a file content.
12103 * Automatic support for ZLIB/Compress compressed document is provided
12104 * by default if found at compile-time.
12105 *
12106 * Returns the new parser context or NULL
12107 */
12108xmlParserCtxtPtr
12109xmlCreateFileParserCtxt(const char *filename)
12110{
12111 return(xmlCreateURLParserCtxt(filename, 0));
12112}
12113
Daniel Veillard81273902003-09-30 00:43:48 +000012114#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012115/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012116 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012117 * @sax: the SAX handler block
12118 * @filename: the filename
12119 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12120 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012121 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012122 *
12123 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12124 * compressed document is provided by default if found at compile-time.
12125 * It use the given SAX function block to handle the parsing callback.
12126 * If sax is NULL, fallback to the default DOM tree building routines.
12127 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012128 * User data (void *) is stored within the parser context in the
12129 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012130 *
Owen Taylor3473f882001-02-23 17:55:21 +000012131 * Returns the resulting document tree
12132 */
12133
12134xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012135xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12136 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012137 xmlDocPtr ret;
12138 xmlParserCtxtPtr ctxt;
12139 char *directory = NULL;
12140
Daniel Veillard635ef722001-10-29 11:48:19 +000012141 xmlInitParser();
12142
Owen Taylor3473f882001-02-23 17:55:21 +000012143 ctxt = xmlCreateFileParserCtxt(filename);
12144 if (ctxt == NULL) {
12145 return(NULL);
12146 }
12147 if (sax != NULL) {
12148 if (ctxt->sax != NULL)
12149 xmlFree(ctxt->sax);
12150 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012151 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012152 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012153 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012154 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012155 }
Owen Taylor3473f882001-02-23 17:55:21 +000012156
12157 if ((ctxt->directory == NULL) && (directory == NULL))
12158 directory = xmlParserGetDirectory(filename);
12159 if ((ctxt->directory == NULL) && (directory != NULL))
12160 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12161
Daniel Veillarddad3f682002-11-17 16:47:27 +000012162 ctxt->recovery = recovery;
12163
Owen Taylor3473f882001-02-23 17:55:21 +000012164 xmlParseDocument(ctxt);
12165
William M. Brackc07329e2003-09-08 01:57:30 +000012166 if ((ctxt->wellFormed) || recovery) {
12167 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012168 if (ret != NULL) {
12169 if (ctxt->input->buf->compressed > 0)
12170 ret->compression = 9;
12171 else
12172 ret->compression = ctxt->input->buf->compressed;
12173 }
William M. Brackc07329e2003-09-08 01:57:30 +000012174 }
Owen Taylor3473f882001-02-23 17:55:21 +000012175 else {
12176 ret = NULL;
12177 xmlFreeDoc(ctxt->myDoc);
12178 ctxt->myDoc = NULL;
12179 }
12180 if (sax != NULL)
12181 ctxt->sax = NULL;
12182 xmlFreeParserCtxt(ctxt);
12183
12184 return(ret);
12185}
12186
12187/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012188 * xmlSAXParseFile:
12189 * @sax: the SAX handler block
12190 * @filename: the filename
12191 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12192 * documents
12193 *
12194 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12195 * compressed document is provided by default if found at compile-time.
12196 * It use the given SAX function block to handle the parsing callback.
12197 * If sax is NULL, fallback to the default DOM tree building routines.
12198 *
12199 * Returns the resulting document tree
12200 */
12201
12202xmlDocPtr
12203xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12204 int recovery) {
12205 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12206}
12207
12208/**
Owen Taylor3473f882001-02-23 17:55:21 +000012209 * xmlRecoverDoc:
12210 * @cur: a pointer to an array of xmlChar
12211 *
12212 * parse an XML in-memory document and build a tree.
12213 * In the case the document is not Well Formed, a tree is built anyway
12214 *
12215 * Returns the resulting document tree
12216 */
12217
12218xmlDocPtr
12219xmlRecoverDoc(xmlChar *cur) {
12220 return(xmlSAXParseDoc(NULL, cur, 1));
12221}
12222
12223/**
12224 * xmlParseFile:
12225 * @filename: the filename
12226 *
12227 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12228 * compressed document is provided by default if found at compile-time.
12229 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012230 * Returns the resulting document tree if the file was wellformed,
12231 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012232 */
12233
12234xmlDocPtr
12235xmlParseFile(const char *filename) {
12236 return(xmlSAXParseFile(NULL, filename, 0));
12237}
12238
12239/**
12240 * xmlRecoverFile:
12241 * @filename: the filename
12242 *
12243 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12244 * compressed document is provided by default if found at compile-time.
12245 * In the case the document is not Well Formed, a tree is built anyway
12246 *
12247 * Returns the resulting document tree
12248 */
12249
12250xmlDocPtr
12251xmlRecoverFile(const char *filename) {
12252 return(xmlSAXParseFile(NULL, filename, 1));
12253}
12254
12255
12256/**
12257 * xmlSetupParserForBuffer:
12258 * @ctxt: an XML parser context
12259 * @buffer: a xmlChar * buffer
12260 * @filename: a file name
12261 *
12262 * Setup the parser context to parse a new buffer; Clears any prior
12263 * contents from the parser context. The buffer parameter must not be
12264 * NULL, but the filename parameter can be
12265 */
12266void
12267xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12268 const char* filename)
12269{
12270 xmlParserInputPtr input;
12271
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012272 if ((ctxt == NULL) || (buffer == NULL))
12273 return;
12274
Owen Taylor3473f882001-02-23 17:55:21 +000012275 input = xmlNewInputStream(ctxt);
12276 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012277 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012278 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012279 return;
12280 }
12281
12282 xmlClearParserCtxt(ctxt);
12283 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012284 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012285 input->base = buffer;
12286 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012287 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012288 inputPush(ctxt, input);
12289}
12290
12291/**
12292 * xmlSAXUserParseFile:
12293 * @sax: a SAX handler
12294 * @user_data: The user data returned on SAX callbacks
12295 * @filename: a file name
12296 *
12297 * parse an XML file and call the given SAX handler routines.
12298 * Automatic support for ZLIB/Compress compressed document is provided
12299 *
12300 * Returns 0 in case of success or a error number otherwise
12301 */
12302int
12303xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12304 const char *filename) {
12305 int ret = 0;
12306 xmlParserCtxtPtr ctxt;
12307
12308 ctxt = xmlCreateFileParserCtxt(filename);
12309 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012310#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012311 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012312#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012313 xmlFree(ctxt->sax);
12314 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012315 xmlDetectSAX2(ctxt);
12316
Owen Taylor3473f882001-02-23 17:55:21 +000012317 if (user_data != NULL)
12318 ctxt->userData = user_data;
12319
12320 xmlParseDocument(ctxt);
12321
12322 if (ctxt->wellFormed)
12323 ret = 0;
12324 else {
12325 if (ctxt->errNo != 0)
12326 ret = ctxt->errNo;
12327 else
12328 ret = -1;
12329 }
12330 if (sax != NULL)
12331 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012332 if (ctxt->myDoc != NULL) {
12333 xmlFreeDoc(ctxt->myDoc);
12334 ctxt->myDoc = NULL;
12335 }
Owen Taylor3473f882001-02-23 17:55:21 +000012336 xmlFreeParserCtxt(ctxt);
12337
12338 return ret;
12339}
Daniel Veillard81273902003-09-30 00:43:48 +000012340#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012341
12342/************************************************************************
12343 * *
12344 * Front ends when parsing from memory *
12345 * *
12346 ************************************************************************/
12347
12348/**
12349 * xmlCreateMemoryParserCtxt:
12350 * @buffer: a pointer to a char array
12351 * @size: the size of the array
12352 *
12353 * Create a parser context for an XML in-memory document.
12354 *
12355 * Returns the new parser context or NULL
12356 */
12357xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012358xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012359 xmlParserCtxtPtr ctxt;
12360 xmlParserInputPtr input;
12361 xmlParserInputBufferPtr buf;
12362
12363 if (buffer == NULL)
12364 return(NULL);
12365 if (size <= 0)
12366 return(NULL);
12367
12368 ctxt = xmlNewParserCtxt();
12369 if (ctxt == NULL)
12370 return(NULL);
12371
Daniel Veillard53350552003-09-18 13:35:51 +000012372 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012373 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012374 if (buf == NULL) {
12375 xmlFreeParserCtxt(ctxt);
12376 return(NULL);
12377 }
Owen Taylor3473f882001-02-23 17:55:21 +000012378
12379 input = xmlNewInputStream(ctxt);
12380 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012381 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012382 xmlFreeParserCtxt(ctxt);
12383 return(NULL);
12384 }
12385
12386 input->filename = NULL;
12387 input->buf = buf;
12388 input->base = input->buf->buffer->content;
12389 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012390 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012391
12392 inputPush(ctxt, input);
12393 return(ctxt);
12394}
12395
Daniel Veillard81273902003-09-30 00:43:48 +000012396#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012397/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012398 * xmlSAXParseMemoryWithData:
12399 * @sax: the SAX handler block
12400 * @buffer: an pointer to a char array
12401 * @size: the size of the array
12402 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12403 * documents
12404 * @data: the userdata
12405 *
12406 * parse an XML in-memory block and use the given SAX function block
12407 * to handle the parsing callback. If sax is NULL, fallback to the default
12408 * DOM tree building routines.
12409 *
12410 * User data (void *) is stored within the parser context in the
12411 * context's _private member, so it is available nearly everywhere in libxml
12412 *
12413 * Returns the resulting document tree
12414 */
12415
12416xmlDocPtr
12417xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12418 int size, int recovery, void *data) {
12419 xmlDocPtr ret;
12420 xmlParserCtxtPtr ctxt;
12421
12422 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12423 if (ctxt == NULL) return(NULL);
12424 if (sax != NULL) {
12425 if (ctxt->sax != NULL)
12426 xmlFree(ctxt->sax);
12427 ctxt->sax = sax;
12428 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012429 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012430 if (data!=NULL) {
12431 ctxt->_private=data;
12432 }
12433
Daniel Veillardadba5f12003-04-04 16:09:01 +000012434 ctxt->recovery = recovery;
12435
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012436 xmlParseDocument(ctxt);
12437
12438 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12439 else {
12440 ret = NULL;
12441 xmlFreeDoc(ctxt->myDoc);
12442 ctxt->myDoc = NULL;
12443 }
12444 if (sax != NULL)
12445 ctxt->sax = NULL;
12446 xmlFreeParserCtxt(ctxt);
12447
12448 return(ret);
12449}
12450
12451/**
Owen Taylor3473f882001-02-23 17:55:21 +000012452 * xmlSAXParseMemory:
12453 * @sax: the SAX handler block
12454 * @buffer: an pointer to a char array
12455 * @size: the size of the array
12456 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12457 * documents
12458 *
12459 * parse an XML in-memory block and use the given SAX function block
12460 * to handle the parsing callback. If sax is NULL, fallback to the default
12461 * DOM tree building routines.
12462 *
12463 * Returns the resulting document tree
12464 */
12465xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012466xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12467 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012468 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012469}
12470
12471/**
12472 * xmlParseMemory:
12473 * @buffer: an pointer to a char array
12474 * @size: the size of the array
12475 *
12476 * parse an XML in-memory block and build a tree.
12477 *
12478 * Returns the resulting document tree
12479 */
12480
Daniel Veillard50822cb2001-07-26 20:05:51 +000012481xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012482 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12483}
12484
12485/**
12486 * xmlRecoverMemory:
12487 * @buffer: an pointer to a char array
12488 * @size: the size of the array
12489 *
12490 * parse an XML in-memory block and build a tree.
12491 * In the case the document is not Well Formed, a tree is built anyway
12492 *
12493 * Returns the resulting document tree
12494 */
12495
Daniel Veillard50822cb2001-07-26 20:05:51 +000012496xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012497 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12498}
12499
12500/**
12501 * xmlSAXUserParseMemory:
12502 * @sax: a SAX handler
12503 * @user_data: The user data returned on SAX callbacks
12504 * @buffer: an in-memory XML document input
12505 * @size: the length of the XML document in bytes
12506 *
12507 * A better SAX parsing routine.
12508 * parse an XML in-memory buffer and call the given SAX handler routines.
12509 *
12510 * Returns 0 in case of success or a error number otherwise
12511 */
12512int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012513 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012514 int ret = 0;
12515 xmlParserCtxtPtr ctxt;
12516 xmlSAXHandlerPtr oldsax = NULL;
12517
Daniel Veillard9e923512002-08-14 08:48:52 +000012518 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012519 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12520 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012521 oldsax = ctxt->sax;
12522 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012523 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012524 if (user_data != NULL)
12525 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012526
12527 xmlParseDocument(ctxt);
12528
12529 if (ctxt->wellFormed)
12530 ret = 0;
12531 else {
12532 if (ctxt->errNo != 0)
12533 ret = ctxt->errNo;
12534 else
12535 ret = -1;
12536 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012537 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012538 if (ctxt->myDoc != NULL) {
12539 xmlFreeDoc(ctxt->myDoc);
12540 ctxt->myDoc = NULL;
12541 }
Owen Taylor3473f882001-02-23 17:55:21 +000012542 xmlFreeParserCtxt(ctxt);
12543
12544 return ret;
12545}
Daniel Veillard81273902003-09-30 00:43:48 +000012546#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012547
12548/**
12549 * xmlCreateDocParserCtxt:
12550 * @cur: a pointer to an array of xmlChar
12551 *
12552 * Creates a parser context for an XML in-memory document.
12553 *
12554 * Returns the new parser context or NULL
12555 */
12556xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012558 int len;
12559
12560 if (cur == NULL)
12561 return(NULL);
12562 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012563 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012564}
12565
Daniel Veillard81273902003-09-30 00:43:48 +000012566#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012567/**
12568 * xmlSAXParseDoc:
12569 * @sax: the SAX handler block
12570 * @cur: a pointer to an array of xmlChar
12571 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12572 * documents
12573 *
12574 * parse an XML in-memory document and build a tree.
12575 * It use the given SAX function block to handle the parsing callback.
12576 * If sax is NULL, fallback to the default DOM tree building routines.
12577 *
12578 * Returns the resulting document tree
12579 */
12580
12581xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012582xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012583 xmlDocPtr ret;
12584 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012585 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012586
Daniel Veillard38936062004-11-04 17:45:11 +000012587 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012588
12589
12590 ctxt = xmlCreateDocParserCtxt(cur);
12591 if (ctxt == NULL) return(NULL);
12592 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012593 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012594 ctxt->sax = sax;
12595 ctxt->userData = NULL;
12596 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012597 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012598
12599 xmlParseDocument(ctxt);
12600 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12601 else {
12602 ret = NULL;
12603 xmlFreeDoc(ctxt->myDoc);
12604 ctxt->myDoc = NULL;
12605 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012606 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012607 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012608 xmlFreeParserCtxt(ctxt);
12609
12610 return(ret);
12611}
12612
12613/**
12614 * xmlParseDoc:
12615 * @cur: a pointer to an array of xmlChar
12616 *
12617 * parse an XML in-memory document and build a tree.
12618 *
12619 * Returns the resulting document tree
12620 */
12621
12622xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012623xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012624 return(xmlSAXParseDoc(NULL, cur, 0));
12625}
Daniel Veillard81273902003-09-30 00:43:48 +000012626#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012627
Daniel Veillard81273902003-09-30 00:43:48 +000012628#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012629/************************************************************************
12630 * *
12631 * Specific function to keep track of entities references *
12632 * and used by the XSLT debugger *
12633 * *
12634 ************************************************************************/
12635
12636static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12637
12638/**
12639 * xmlAddEntityReference:
12640 * @ent : A valid entity
12641 * @firstNode : A valid first node for children of entity
12642 * @lastNode : A valid last node of children entity
12643 *
12644 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12645 */
12646static void
12647xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12648 xmlNodePtr lastNode)
12649{
12650 if (xmlEntityRefFunc != NULL) {
12651 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12652 }
12653}
12654
12655
12656/**
12657 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012658 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012659 *
12660 * Set the function to call call back when a xml reference has been made
12661 */
12662void
12663xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12664{
12665 xmlEntityRefFunc = func;
12666}
Daniel Veillard81273902003-09-30 00:43:48 +000012667#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012668
12669/************************************************************************
12670 * *
12671 * Miscellaneous *
12672 * *
12673 ************************************************************************/
12674
12675#ifdef LIBXML_XPATH_ENABLED
12676#include <libxml/xpath.h>
12677#endif
12678
Daniel Veillardffa3c742005-07-21 13:24:09 +000012679extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012680static int xmlParserInitialized = 0;
12681
12682/**
12683 * xmlInitParser:
12684 *
12685 * Initialization function for the XML parser.
12686 * This is not reentrant. Call once before processing in case of
12687 * use in multithreaded programs.
12688 */
12689
12690void
12691xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012692 if (xmlParserInitialized != 0)
12693 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012694
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012695 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12696 (xmlGenericError == NULL))
12697 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012698 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012699 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012700 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012701 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012702 xmlDefaultSAXHandlerInit();
12703 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012704#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012705 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012706#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012707#ifdef LIBXML_HTML_ENABLED
12708 htmlInitAutoClose();
12709 htmlDefaultSAXHandlerInit();
12710#endif
12711#ifdef LIBXML_XPATH_ENABLED
12712 xmlXPathInit();
12713#endif
12714 xmlParserInitialized = 1;
12715}
12716
12717/**
12718 * xmlCleanupParser:
12719 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012720 * Cleanup function for the XML library. It tries to reclaim all
12721 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012722 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012723 * function should not prevent reusing the library but one should
12724 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012725 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012726 */
12727
12728void
12729xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012730 if (!xmlParserInitialized)
12731 return;
12732
Owen Taylor3473f882001-02-23 17:55:21 +000012733 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012734#ifdef LIBXML_CATALOG_ENABLED
12735 xmlCatalogCleanup();
12736#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012737 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012738 xmlCleanupInputCallbacks();
12739#ifdef LIBXML_OUTPUT_ENABLED
12740 xmlCleanupOutputCallbacks();
12741#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012742#ifdef LIBXML_SCHEMAS_ENABLED
12743 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012744 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012745#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012746 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012747 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012748 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012749 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012750 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012751}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012752
12753/************************************************************************
12754 * *
12755 * New set (2.6.0) of simpler and more flexible APIs *
12756 * *
12757 ************************************************************************/
12758
12759/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012760 * DICT_FREE:
12761 * @str: a string
12762 *
12763 * Free a string if it is not owned by the "dict" dictionnary in the
12764 * current scope
12765 */
12766#define DICT_FREE(str) \
12767 if ((str) && ((!dict) || \
12768 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12769 xmlFree((char *)(str));
12770
12771/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012772 * xmlCtxtReset:
12773 * @ctxt: an XML parser context
12774 *
12775 * Reset a parser context
12776 */
12777void
12778xmlCtxtReset(xmlParserCtxtPtr ctxt)
12779{
12780 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012781 xmlDictPtr dict;
12782
12783 if (ctxt == NULL)
12784 return;
12785
12786 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012787
12788 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12789 xmlFreeInputStream(input);
12790 }
12791 ctxt->inputNr = 0;
12792 ctxt->input = NULL;
12793
12794 ctxt->spaceNr = 0;
12795 ctxt->spaceTab[0] = -1;
12796 ctxt->space = &ctxt->spaceTab[0];
12797
12798
12799 ctxt->nodeNr = 0;
12800 ctxt->node = NULL;
12801
12802 ctxt->nameNr = 0;
12803 ctxt->name = NULL;
12804
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012805 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012806 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012807 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012808 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012809 DICT_FREE(ctxt->directory);
12810 ctxt->directory = NULL;
12811 DICT_FREE(ctxt->extSubURI);
12812 ctxt->extSubURI = NULL;
12813 DICT_FREE(ctxt->extSubSystem);
12814 ctxt->extSubSystem = NULL;
12815 if (ctxt->myDoc != NULL)
12816 xmlFreeDoc(ctxt->myDoc);
12817 ctxt->myDoc = NULL;
12818
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012819 ctxt->standalone = -1;
12820 ctxt->hasExternalSubset = 0;
12821 ctxt->hasPErefs = 0;
12822 ctxt->html = 0;
12823 ctxt->external = 0;
12824 ctxt->instate = XML_PARSER_START;
12825 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012826
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012827 ctxt->wellFormed = 1;
12828 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012829 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012830 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012831#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012832 ctxt->vctxt.userData = ctxt;
12833 ctxt->vctxt.error = xmlParserValidityError;
12834 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012835#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012836 ctxt->record_info = 0;
12837 ctxt->nbChars = 0;
12838 ctxt->checkIndex = 0;
12839 ctxt->inSubset = 0;
12840 ctxt->errNo = XML_ERR_OK;
12841 ctxt->depth = 0;
12842 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12843 ctxt->catalogs = NULL;
12844 xmlInitNodeInfoSeq(&ctxt->node_seq);
12845
12846 if (ctxt->attsDefault != NULL) {
12847 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12848 ctxt->attsDefault = NULL;
12849 }
12850 if (ctxt->attsSpecial != NULL) {
12851 xmlHashFree(ctxt->attsSpecial, NULL);
12852 ctxt->attsSpecial = NULL;
12853 }
12854
Daniel Veillard4432df22003-09-28 18:58:27 +000012855#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012856 if (ctxt->catalogs != NULL)
12857 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012858#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012859 if (ctxt->lastError.code != XML_ERR_OK)
12860 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012861}
12862
12863/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012864 * xmlCtxtResetPush:
12865 * @ctxt: an XML parser context
12866 * @chunk: a pointer to an array of chars
12867 * @size: number of chars in the array
12868 * @filename: an optional file name or URI
12869 * @encoding: the document encoding, or NULL
12870 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012871 * Reset a push parser context
12872 *
12873 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012874 */
12875int
12876xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12877 int size, const char *filename, const char *encoding)
12878{
12879 xmlParserInputPtr inputStream;
12880 xmlParserInputBufferPtr buf;
12881 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12882
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012883 if (ctxt == NULL)
12884 return(1);
12885
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012886 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12887 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12888
12889 buf = xmlAllocParserInputBuffer(enc);
12890 if (buf == NULL)
12891 return(1);
12892
12893 if (ctxt == NULL) {
12894 xmlFreeParserInputBuffer(buf);
12895 return(1);
12896 }
12897
12898 xmlCtxtReset(ctxt);
12899
12900 if (ctxt->pushTab == NULL) {
12901 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12902 sizeof(xmlChar *));
12903 if (ctxt->pushTab == NULL) {
12904 xmlErrMemory(ctxt, NULL);
12905 xmlFreeParserInputBuffer(buf);
12906 return(1);
12907 }
12908 }
12909
12910 if (filename == NULL) {
12911 ctxt->directory = NULL;
12912 } else {
12913 ctxt->directory = xmlParserGetDirectory(filename);
12914 }
12915
12916 inputStream = xmlNewInputStream(ctxt);
12917 if (inputStream == NULL) {
12918 xmlFreeParserInputBuffer(buf);
12919 return(1);
12920 }
12921
12922 if (filename == NULL)
12923 inputStream->filename = NULL;
12924 else
12925 inputStream->filename = (char *)
12926 xmlCanonicPath((const xmlChar *) filename);
12927 inputStream->buf = buf;
12928 inputStream->base = inputStream->buf->buffer->content;
12929 inputStream->cur = inputStream->buf->buffer->content;
12930 inputStream->end =
12931 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12932
12933 inputPush(ctxt, inputStream);
12934
12935 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12936 (ctxt->input->buf != NULL)) {
12937 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12938 int cur = ctxt->input->cur - ctxt->input->base;
12939
12940 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12941
12942 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12943 ctxt->input->cur = ctxt->input->base + cur;
12944 ctxt->input->end =
12945 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12946 use];
12947#ifdef DEBUG_PUSH
12948 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12949#endif
12950 }
12951
12952 if (encoding != NULL) {
12953 xmlCharEncodingHandlerPtr hdlr;
12954
12955 hdlr = xmlFindCharEncodingHandler(encoding);
12956 if (hdlr != NULL) {
12957 xmlSwitchToEncoding(ctxt, hdlr);
12958 } else {
12959 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12960 "Unsupported encoding %s\n", BAD_CAST encoding);
12961 }
12962 } else if (enc != XML_CHAR_ENCODING_NONE) {
12963 xmlSwitchEncoding(ctxt, enc);
12964 }
12965
12966 return(0);
12967}
12968
12969/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012970 * xmlCtxtUseOptions:
12971 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012972 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973 *
12974 * Applies the options to the parser context
12975 *
12976 * Returns 0 in case of success, the set of unknown or unimplemented options
12977 * in case of error.
12978 */
12979int
12980xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12981{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012982 if (ctxt == NULL)
12983 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012984 if (options & XML_PARSE_RECOVER) {
12985 ctxt->recovery = 1;
12986 options -= XML_PARSE_RECOVER;
12987 } else
12988 ctxt->recovery = 0;
12989 if (options & XML_PARSE_DTDLOAD) {
12990 ctxt->loadsubset = XML_DETECT_IDS;
12991 options -= XML_PARSE_DTDLOAD;
12992 } else
12993 ctxt->loadsubset = 0;
12994 if (options & XML_PARSE_DTDATTR) {
12995 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12996 options -= XML_PARSE_DTDATTR;
12997 }
12998 if (options & XML_PARSE_NOENT) {
12999 ctxt->replaceEntities = 1;
13000 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13001 options -= XML_PARSE_NOENT;
13002 } else
13003 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013004 if (options & XML_PARSE_PEDANTIC) {
13005 ctxt->pedantic = 1;
13006 options -= XML_PARSE_PEDANTIC;
13007 } else
13008 ctxt->pedantic = 0;
13009 if (options & XML_PARSE_NOBLANKS) {
13010 ctxt->keepBlanks = 0;
13011 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13012 options -= XML_PARSE_NOBLANKS;
13013 } else
13014 ctxt->keepBlanks = 1;
13015 if (options & XML_PARSE_DTDVALID) {
13016 ctxt->validate = 1;
13017 if (options & XML_PARSE_NOWARNING)
13018 ctxt->vctxt.warning = NULL;
13019 if (options & XML_PARSE_NOERROR)
13020 ctxt->vctxt.error = NULL;
13021 options -= XML_PARSE_DTDVALID;
13022 } else
13023 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013024 if (options & XML_PARSE_NOWARNING) {
13025 ctxt->sax->warning = NULL;
13026 options -= XML_PARSE_NOWARNING;
13027 }
13028 if (options & XML_PARSE_NOERROR) {
13029 ctxt->sax->error = NULL;
13030 ctxt->sax->fatalError = NULL;
13031 options -= XML_PARSE_NOERROR;
13032 }
Daniel Veillard81273902003-09-30 00:43:48 +000013033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013034 if (options & XML_PARSE_SAX1) {
13035 ctxt->sax->startElement = xmlSAX2StartElement;
13036 ctxt->sax->endElement = xmlSAX2EndElement;
13037 ctxt->sax->startElementNs = NULL;
13038 ctxt->sax->endElementNs = NULL;
13039 ctxt->sax->initialized = 1;
13040 options -= XML_PARSE_SAX1;
13041 }
Daniel Veillard81273902003-09-30 00:43:48 +000013042#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013043 if (options & XML_PARSE_NODICT) {
13044 ctxt->dictNames = 0;
13045 options -= XML_PARSE_NODICT;
13046 } else {
13047 ctxt->dictNames = 1;
13048 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013049 if (options & XML_PARSE_NOCDATA) {
13050 ctxt->sax->cdataBlock = NULL;
13051 options -= XML_PARSE_NOCDATA;
13052 }
13053 if (options & XML_PARSE_NSCLEAN) {
13054 ctxt->options |= XML_PARSE_NSCLEAN;
13055 options -= XML_PARSE_NSCLEAN;
13056 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013057 if (options & XML_PARSE_NONET) {
13058 ctxt->options |= XML_PARSE_NONET;
13059 options -= XML_PARSE_NONET;
13060 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013061 if (options & XML_PARSE_COMPACT) {
13062 ctxt->options |= XML_PARSE_COMPACT;
13063 options -= XML_PARSE_COMPACT;
13064 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013065 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013066 return (options);
13067}
13068
13069/**
13070 * xmlDoRead:
13071 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013072 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013073 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013074 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013075 * @reuse: keep the context for reuse
13076 *
13077 * Common front-end for the xmlRead functions
13078 *
13079 * Returns the resulting document tree or NULL
13080 */
13081static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013082xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13083 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013084{
13085 xmlDocPtr ret;
13086
13087 xmlCtxtUseOptions(ctxt, options);
13088 if (encoding != NULL) {
13089 xmlCharEncodingHandlerPtr hdlr;
13090
13091 hdlr = xmlFindCharEncodingHandler(encoding);
13092 if (hdlr != NULL)
13093 xmlSwitchToEncoding(ctxt, hdlr);
13094 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013095 if ((URL != NULL) && (ctxt->input != NULL) &&
13096 (ctxt->input->filename == NULL))
13097 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013098 xmlParseDocument(ctxt);
13099 if ((ctxt->wellFormed) || ctxt->recovery)
13100 ret = ctxt->myDoc;
13101 else {
13102 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013103 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013104 xmlFreeDoc(ctxt->myDoc);
13105 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013106 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013107 ctxt->myDoc = NULL;
13108 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013109 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013110 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013111
13112 return (ret);
13113}
13114
13115/**
13116 * xmlReadDoc:
13117 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013118 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013119 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013120 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013121 *
13122 * parse an XML in-memory document and build a tree.
13123 *
13124 * Returns the resulting document tree
13125 */
13126xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013127xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013128{
13129 xmlParserCtxtPtr ctxt;
13130
13131 if (cur == NULL)
13132 return (NULL);
13133
13134 ctxt = xmlCreateDocParserCtxt(cur);
13135 if (ctxt == NULL)
13136 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013137 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013138}
13139
13140/**
13141 * xmlReadFile:
13142 * @filename: a file or URL
13143 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013144 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013145 *
13146 * parse an XML file from the filesystem or the network.
13147 *
13148 * Returns the resulting document tree
13149 */
13150xmlDocPtr
13151xmlReadFile(const char *filename, const char *encoding, int options)
13152{
13153 xmlParserCtxtPtr ctxt;
13154
Daniel Veillard61b93382003-11-03 14:28:31 +000013155 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013156 if (ctxt == NULL)
13157 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013158 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013159}
13160
13161/**
13162 * xmlReadMemory:
13163 * @buffer: a pointer to a char array
13164 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013165 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013166 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013167 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013168 *
13169 * parse an XML in-memory document and build a tree.
13170 *
13171 * Returns the resulting document tree
13172 */
13173xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013174xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013175{
13176 xmlParserCtxtPtr ctxt;
13177
13178 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13179 if (ctxt == NULL)
13180 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013181 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013182}
13183
13184/**
13185 * xmlReadFd:
13186 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013187 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013188 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013189 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013190 *
13191 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013192 * NOTE that the file descriptor will not be closed when the
13193 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013194 *
13195 * Returns the resulting document tree
13196 */
13197xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013198xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013199{
13200 xmlParserCtxtPtr ctxt;
13201 xmlParserInputBufferPtr input;
13202 xmlParserInputPtr stream;
13203
13204 if (fd < 0)
13205 return (NULL);
13206
13207 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13208 if (input == NULL)
13209 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013210 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013211 ctxt = xmlNewParserCtxt();
13212 if (ctxt == NULL) {
13213 xmlFreeParserInputBuffer(input);
13214 return (NULL);
13215 }
13216 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13217 if (stream == NULL) {
13218 xmlFreeParserInputBuffer(input);
13219 xmlFreeParserCtxt(ctxt);
13220 return (NULL);
13221 }
13222 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013223 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013224}
13225
13226/**
13227 * xmlReadIO:
13228 * @ioread: an I/O read function
13229 * @ioclose: an I/O close function
13230 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013231 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013232 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013233 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013234 *
13235 * parse an XML document from I/O functions and source and build a tree.
13236 *
13237 * Returns the resulting document tree
13238 */
13239xmlDocPtr
13240xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013241 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013242{
13243 xmlParserCtxtPtr ctxt;
13244 xmlParserInputBufferPtr input;
13245 xmlParserInputPtr stream;
13246
13247 if (ioread == NULL)
13248 return (NULL);
13249
13250 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13251 XML_CHAR_ENCODING_NONE);
13252 if (input == NULL)
13253 return (NULL);
13254 ctxt = xmlNewParserCtxt();
13255 if (ctxt == NULL) {
13256 xmlFreeParserInputBuffer(input);
13257 return (NULL);
13258 }
13259 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13260 if (stream == NULL) {
13261 xmlFreeParserInputBuffer(input);
13262 xmlFreeParserCtxt(ctxt);
13263 return (NULL);
13264 }
13265 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013266 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013267}
13268
13269/**
13270 * xmlCtxtReadDoc:
13271 * @ctxt: an XML parser context
13272 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013273 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013274 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013275 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013276 *
13277 * parse an XML in-memory document and build a tree.
13278 * This reuses the existing @ctxt parser context
13279 *
13280 * Returns the resulting document tree
13281 */
13282xmlDocPtr
13283xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013284 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013285{
13286 xmlParserInputPtr stream;
13287
13288 if (cur == NULL)
13289 return (NULL);
13290 if (ctxt == NULL)
13291 return (NULL);
13292
13293 xmlCtxtReset(ctxt);
13294
13295 stream = xmlNewStringInputStream(ctxt, cur);
13296 if (stream == NULL) {
13297 return (NULL);
13298 }
13299 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013300 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013301}
13302
13303/**
13304 * xmlCtxtReadFile:
13305 * @ctxt: an XML parser context
13306 * @filename: a file or URL
13307 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013308 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013309 *
13310 * parse an XML file from the filesystem or the network.
13311 * This reuses the existing @ctxt parser context
13312 *
13313 * Returns the resulting document tree
13314 */
13315xmlDocPtr
13316xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13317 const char *encoding, int options)
13318{
13319 xmlParserInputPtr stream;
13320
13321 if (filename == NULL)
13322 return (NULL);
13323 if (ctxt == NULL)
13324 return (NULL);
13325
13326 xmlCtxtReset(ctxt);
13327
Daniel Veillard29614c72004-11-26 10:47:26 +000013328 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013329 if (stream == NULL) {
13330 return (NULL);
13331 }
13332 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013333 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013334}
13335
13336/**
13337 * xmlCtxtReadMemory:
13338 * @ctxt: an XML parser context
13339 * @buffer: a pointer to a char array
13340 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013341 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013342 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013343 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013344 *
13345 * parse an XML in-memory document and build a tree.
13346 * This reuses the existing @ctxt parser context
13347 *
13348 * Returns the resulting document tree
13349 */
13350xmlDocPtr
13351xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013352 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013353{
13354 xmlParserInputBufferPtr input;
13355 xmlParserInputPtr stream;
13356
13357 if (ctxt == NULL)
13358 return (NULL);
13359 if (buffer == NULL)
13360 return (NULL);
13361
13362 xmlCtxtReset(ctxt);
13363
13364 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13365 if (input == NULL) {
13366 return(NULL);
13367 }
13368
13369 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13370 if (stream == NULL) {
13371 xmlFreeParserInputBuffer(input);
13372 return(NULL);
13373 }
13374
13375 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013376 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013377}
13378
13379/**
13380 * xmlCtxtReadFd:
13381 * @ctxt: an XML parser context
13382 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013383 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013384 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013385 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013386 *
13387 * parse an XML from a file descriptor and build a tree.
13388 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013389 * NOTE that the file descriptor will not be closed when the
13390 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013391 *
13392 * Returns the resulting document tree
13393 */
13394xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013395xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13396 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013397{
13398 xmlParserInputBufferPtr input;
13399 xmlParserInputPtr stream;
13400
13401 if (fd < 0)
13402 return (NULL);
13403 if (ctxt == NULL)
13404 return (NULL);
13405
13406 xmlCtxtReset(ctxt);
13407
13408
13409 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13410 if (input == NULL)
13411 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013412 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013413 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13414 if (stream == NULL) {
13415 xmlFreeParserInputBuffer(input);
13416 return (NULL);
13417 }
13418 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013419 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013420}
13421
13422/**
13423 * xmlCtxtReadIO:
13424 * @ctxt: an XML parser context
13425 * @ioread: an I/O read function
13426 * @ioclose: an I/O close function
13427 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013428 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013429 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013430 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013431 *
13432 * parse an XML document from I/O functions and source and build a tree.
13433 * This reuses the existing @ctxt parser context
13434 *
13435 * Returns the resulting document tree
13436 */
13437xmlDocPtr
13438xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13439 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013440 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013441 const char *encoding, int options)
13442{
13443 xmlParserInputBufferPtr input;
13444 xmlParserInputPtr stream;
13445
13446 if (ioread == NULL)
13447 return (NULL);
13448 if (ctxt == NULL)
13449 return (NULL);
13450
13451 xmlCtxtReset(ctxt);
13452
13453 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13454 XML_CHAR_ENCODING_NONE);
13455 if (input == NULL)
13456 return (NULL);
13457 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13458 if (stream == NULL) {
13459 xmlFreeParserInputBuffer(input);
13460 return (NULL);
13461 }
13462 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013463 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013464}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013465
13466#define bottom_parser
13467#include "elfgcchack.h"