blob: a0194dc69523c637f1126fea576c6eaf8f558879 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard37334572008-07-31 08:20:02 +0000119static int
120xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
121 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000122#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000123static void
124xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
125 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000126#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000127
Daniel Veillard7d515752003-09-26 19:12:37 +0000128static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000129xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
130 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000131
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000132static int
133xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
134
Daniel Veillarde57ec792003-09-10 10:50:59 +0000135/************************************************************************
136 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000137 * Some factorized error routines *
138 * *
139 ************************************************************************/
140
141/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000142 * xmlErrAttributeDup:
143 * @ctxt: an XML parser context
144 * @prefix: the attribute prefix
145 * @localname: the attribute localname
146 *
147 * Handle a redefinition of attribute error
148 */
149static void
150xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
151 const xmlChar * localname)
152{
Daniel Veillard157fee02003-10-31 10:36:03 +0000153 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
154 (ctxt->instate == XML_PARSER_EOF))
155 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000156 if (ctxt != NULL)
157 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000158 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000159 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000160 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
161 (const char *) localname, NULL, NULL, 0, 0,
162 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000163 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000164 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000165 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
166 (const char *) prefix, (const char *) localname,
167 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
168 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000169 if (ctxt != NULL) {
170 ctxt->wellFormed = 0;
171 if (ctxt->recovery == 0)
172 ctxt->disableSAX = 1;
173 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000174}
175
176/**
177 * xmlFatalErr:
178 * @ctxt: an XML parser context
179 * @error: the error number
180 * @extra: extra information string
181 *
182 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
183 */
184static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186{
187 const char *errmsg;
188
Daniel Veillard157fee02003-10-31 10:36:03 +0000189 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
190 (ctxt->instate == XML_PARSER_EOF))
191 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000192 switch (error) {
193 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid hexadecimal value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "CharRef: invalid decimal value\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "CharRef: invalid value\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "internal error";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference at end of document\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference in prolog\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference in epilog\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "PEReference: no name\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReference: expecting ';'\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "Detected an entity reference loop\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "PEReferences forbidden in internal subset\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "EntityValue: \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "AttValue: \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unescaped '<' not allowed in attributes values\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SystemLiteral \" or ' expected\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "Unfinished System or Public ID \" or ' expected\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Sequence ']]>' not allowed in content\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "PUBLIC, the Public Identifier is missing\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Comment must not contain '--' (double-hyphen)\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "xmlParsePI : no target name\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Invalid PI name\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "NOTATION: Name expected here\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'>' required to close NOTATION declaration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "Entity value required\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "Fragment not allowed";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "'(' required to start ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "NmToken expected in ATTLIST enumeration\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "')' required to finish ATTLIST enumeration\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg = "ContentDecl : Name or '(' expected\n";
291 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000292 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000293 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
294 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000295 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000296 errmsg =
297 "PEReference: forbidden within markup decl in internal subset\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "expected '>'\n";
301 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000303 errmsg = "XML conditional section '[' expected\n";
304 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000306 errmsg = "Content error in the external subset\n";
307 break;
308 case XML_ERR_CONDSEC_INVALID_KEYWORD:
309 errmsg =
310 "conditional section INCLUDE or IGNORE keyword expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "XML conditional section not closed\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Text declaration '<?xml' required\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "parsing XML declaration: '?>' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "external parsed entities cannot be standalone\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityRef: expecting ';'\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "DOCTYPE improperly terminated\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "EndTag: '</' not found\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "expected '='\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "String not closed expecting \" or '\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "String not started expecting ' or \"\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Invalid XML encoding name\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "standalone accepts only 'yes' or 'no'\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Document is empty\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Extra content at the end of the document\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "chunk is not well balanced\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "extra content at the end of well balanced chunk\n";
359 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000360 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "Malformed declaration expecting version\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 case:
365 errmsg = "\n";
366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 default:
369 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000371 if (ctxt != NULL)
372 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000373 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
375 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000376 if (ctxt != NULL) {
377 ctxt->wellFormed = 0;
378 if (ctxt->recovery == 0)
379 ctxt->disableSAX = 1;
380 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381}
382
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000383/**
384 * xmlFatalErrMsg:
385 * @ctxt: an XML parser context
386 * @error: the error number
387 * @msg: the error message
388 *
389 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
390 */
391static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
393 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000394{
Daniel Veillard157fee02003-10-31 10:36:03 +0000395 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396 (ctxt->instate == XML_PARSER_EOF))
397 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000398 if (ctxt != NULL)
399 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000400 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000402 if (ctxt != NULL) {
403 ctxt->wellFormed = 0;
404 if (ctxt->recovery == 0)
405 ctxt->disableSAX = 1;
406 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000407}
408
409/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000410 * xmlWarningMsg:
411 * @ctxt: an XML parser context
412 * @error: the error number
413 * @msg: the error message
414 * @str1: extra data
415 * @str2: extra data
416 *
417 * Handle a warning.
418 */
419static void
420xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
421 const char *msg, const xmlChar *str1, const xmlChar *str2)
422{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000423 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000424
Daniel Veillard157fee02003-10-31 10:36:03 +0000425 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
426 (ctxt->instate == XML_PARSER_EOF))
427 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000428 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
429 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000430 schannel = ctxt->sax->serror;
431 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000432 (ctxt->sax) ? ctxt->sax->warning : NULL,
433 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000434 ctxt, NULL, XML_FROM_PARSER, error,
435 XML_ERR_WARNING, NULL, 0,
436 (const char *) str1, (const char *) str2, NULL, 0, 0,
437 msg, (const char *) str1, (const char *) str2);
438}
439
440/**
441 * xmlValidityError:
442 * @ctxt: an XML parser context
443 * @error: the error number
444 * @msg: the error message
445 * @str1: extra data
446 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000447 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000448 */
449static void
450xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
451 const char *msg, const xmlChar *str1)
452{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000453 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000454
455 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
456 (ctxt->instate == XML_PARSER_EOF))
457 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000458 if (ctxt != NULL) {
459 ctxt->errNo = error;
460 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
461 schannel = ctxt->sax->serror;
462 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000463 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000464 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000465 ctxt, NULL, XML_FROM_DTD, error,
466 XML_ERR_ERROR, NULL, 0, (const char *) str1,
467 NULL, NULL, 0, 0,
468 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000469 if (ctxt != NULL) {
470 ctxt->valid = 0;
471 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000472}
473
474/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000475 * xmlFatalErrMsgInt:
476 * @ctxt: an XML parser context
477 * @error: the error number
478 * @msg: the error message
479 * @val: an integer value
480 *
481 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
482 */
483static void
484xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000486{
Daniel Veillard157fee02003-10-31 10:36:03 +0000487 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
488 (ctxt->instate == XML_PARSER_EOF))
489 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000490 if (ctxt != NULL)
491 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000492 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000493 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
494 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000495 if (ctxt != NULL) {
496 ctxt->wellFormed = 0;
497 if (ctxt->recovery == 0)
498 ctxt->disableSAX = 1;
499 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000500}
501
502/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000503 * xmlFatalErrMsgStrIntStr:
504 * @ctxt: an XML parser context
505 * @error: the error number
506 * @msg: the error message
507 * @str1: an string info
508 * @val: an integer value
509 * @str2: an string info
510 *
511 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
512 */
513static void
514xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
515 const char *msg, const xmlChar *str1, int val,
516 const xmlChar *str2)
517{
Daniel Veillard157fee02003-10-31 10:36:03 +0000518 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
519 (ctxt->instate == XML_PARSER_EOF))
520 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL)
522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000524 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
525 NULL, 0, (const char *) str1, (const char *) str2,
526 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000527 if (ctxt != NULL) {
528 ctxt->wellFormed = 0;
529 if (ctxt->recovery == 0)
530 ctxt->disableSAX = 1;
531 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000532}
533
534/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000535 * xmlFatalErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
542 */
543static void
544xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL)
551 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000553 XML_FROM_PARSER, error, XML_ERR_FATAL,
554 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
555 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000561}
562
563/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000564 * xmlErrMsgStr:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 * @val: a string value
569 *
570 * Handle a non fatal parser error
571 */
572static void
573xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
574 const char *msg, const xmlChar * val)
575{
Daniel Veillard157fee02003-10-31 10:36:03 +0000576 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
577 (ctxt->instate == XML_PARSER_EOF))
578 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000579 if (ctxt != NULL)
580 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000582 XML_FROM_PARSER, error, XML_ERR_ERROR,
583 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
584 val);
585}
586
587/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000588 * xmlNsErr:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the message
592 * @info1: extra information string
593 * @info2: extra information string
594 *
595 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
596 */
597static void
598xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
599 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000600 const xmlChar * info1, const xmlChar * info2,
601 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000602{
Daniel Veillard157fee02003-10-31 10:36:03 +0000603 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
604 (ctxt->instate == XML_PARSER_EOF))
605 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000608 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000609 XML_ERR_ERROR, NULL, 0, (const char *) info1,
610 (const char *) info2, (const char *) info3, 0, 0, msg,
611 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000612 if (ctxt != NULL)
613 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000614}
615
Daniel Veillard37334572008-07-31 08:20:02 +0000616/**
617 * xmlNsWarn
618 * @ctxt: an XML parser context
619 * @error: the error number
620 * @msg: the message
621 * @info1: extra information string
622 * @info2: extra information string
623 *
624 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
625 */
626static void
627xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
628 const char *msg,
629 const xmlChar * info1, const xmlChar * info2,
630 const xmlChar * info3)
631{
632 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
633 (ctxt->instate == XML_PARSER_EOF))
634 return;
635 if (ctxt != NULL)
636 ctxt->errNo = error;
637 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
638 XML_ERR_WARNING, NULL, 0, (const char *) info1,
639 (const char *) info2, (const char *) info3, 0, 0, msg,
640 info1, info2, info3);
641}
642
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000643/************************************************************************
644 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000645 * Library wide options *
646 * *
647 ************************************************************************/
648
649/**
650 * xmlHasFeature:
651 * @feature: the feature to be examined
652 *
653 * Examines if the library has been compiled with a given feature.
654 *
655 * Returns a non-zero value if the feature exist, otherwise zero.
656 * Returns zero (0) if the feature does not exist or an unknown
657 * unknown feature is requested, non-zero otherwise.
658 */
659int
660xmlHasFeature(xmlFeature feature)
661{
662 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000663 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000664#ifdef LIBXML_THREAD_ENABLED
665 return(1);
666#else
667 return(0);
668#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000669 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000670#ifdef LIBXML_TREE_ENABLED
671 return(1);
672#else
673 return(0);
674#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000675 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000676#ifdef LIBXML_OUTPUT_ENABLED
677 return(1);
678#else
679 return(0);
680#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000681 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000682#ifdef LIBXML_PUSH_ENABLED
683 return(1);
684#else
685 return(0);
686#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000687 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000688#ifdef LIBXML_READER_ENABLED
689 return(1);
690#else
691 return(0);
692#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000693 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000694#ifdef LIBXML_PATTERN_ENABLED
695 return(1);
696#else
697 return(0);
698#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000699 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000700#ifdef LIBXML_WRITER_ENABLED
701 return(1);
702#else
703 return(0);
704#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000705 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000706#ifdef LIBXML_SAX1_ENABLED
707 return(1);
708#else
709 return(0);
710#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000711 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000712#ifdef LIBXML_FTP_ENABLED
713 return(1);
714#else
715 return(0);
716#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000717 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000718#ifdef LIBXML_HTTP_ENABLED
719 return(1);
720#else
721 return(0);
722#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000723 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000724#ifdef LIBXML_VALID_ENABLED
725 return(1);
726#else
727 return(0);
728#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000729 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000730#ifdef LIBXML_HTML_ENABLED
731 return(1);
732#else
733 return(0);
734#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000735 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000736#ifdef LIBXML_LEGACY_ENABLED
737 return(1);
738#else
739 return(0);
740#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000741 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000742#ifdef LIBXML_C14N_ENABLED
743 return(1);
744#else
745 return(0);
746#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000747 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000748#ifdef LIBXML_CATALOG_ENABLED
749 return(1);
750#else
751 return(0);
752#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000753 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000754#ifdef LIBXML_XPATH_ENABLED
755 return(1);
756#else
757 return(0);
758#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000759 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000760#ifdef LIBXML_XPTR_ENABLED
761 return(1);
762#else
763 return(0);
764#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000765 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000766#ifdef LIBXML_XINCLUDE_ENABLED
767 return(1);
768#else
769 return(0);
770#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000771 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000772#ifdef LIBXML_ICONV_ENABLED
773 return(1);
774#else
775 return(0);
776#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000777 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000778#ifdef LIBXML_ISO8859X_ENABLED
779 return(1);
780#else
781 return(0);
782#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000783 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000784#ifdef LIBXML_UNICODE_ENABLED
785 return(1);
786#else
787 return(0);
788#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000789 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000790#ifdef LIBXML_REGEXP_ENABLED
791 return(1);
792#else
793 return(0);
794#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000795 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000796#ifdef LIBXML_AUTOMATA_ENABLED
797 return(1);
798#else
799 return(0);
800#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000801 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000802#ifdef LIBXML_EXPR_ENABLED
803 return(1);
804#else
805 return(0);
806#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000807 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000808#ifdef LIBXML_SCHEMAS_ENABLED
809 return(1);
810#else
811 return(0);
812#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000813 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000814#ifdef LIBXML_SCHEMATRON_ENABLED
815 return(1);
816#else
817 return(0);
818#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000819 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000820#ifdef LIBXML_MODULES_ENABLED
821 return(1);
822#else
823 return(0);
824#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000825 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000826#ifdef LIBXML_DEBUG_ENABLED
827 return(1);
828#else
829 return(0);
830#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000831 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832#ifdef DEBUG_MEMORY_LOCATION
833 return(1);
834#else
835 return(0);
836#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000837 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838#ifdef LIBXML_DEBUG_RUNTIME
839 return(1);
840#else
841 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000842#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000843 case XML_WITH_ZLIB:
844#ifdef LIBXML_ZLIB_ENABLED
845 return(1);
846#else
847 return(0);
848#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849 default:
850 break;
851 }
852 return(0);
853}
854
855/************************************************************************
856 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000857 * SAX2 defaulted attributes handling *
858 * *
859 ************************************************************************/
860
861/**
862 * xmlDetectSAX2:
863 * @ctxt: an XML parser context
864 *
865 * Do the SAX2 detection and specific intialization
866 */
867static void
868xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
869 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000870#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000871 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
872 ((ctxt->sax->startElementNs != NULL) ||
873 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000874#else
875 ctxt->sax2 = 1;
876#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877
878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000881 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
882 (ctxt->str_xml_ns == NULL)) {
883 xmlErrMemory(ctxt, NULL);
884 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000885}
886
Daniel Veillarde57ec792003-09-10 10:50:59 +0000887typedef struct _xmlDefAttrs xmlDefAttrs;
888typedef xmlDefAttrs *xmlDefAttrsPtr;
889struct _xmlDefAttrs {
890 int nbAttrs; /* number of defaulted attributes on that element */
891 int maxAttrs; /* the size of the array */
892 const xmlChar *values[4]; /* array of localname/prefix/values */
893};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894
895/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000896 * xmlAttrNormalizeSpace:
897 * @src: the source string
898 * @dst: the target string
899 *
900 * Normalize the space in non CDATA attribute values:
901 * If the attribute type is not CDATA, then the XML processor MUST further
902 * process the normalized attribute value by discarding any leading and
903 * trailing space (#x20) characters, and by replacing sequences of space
904 * (#x20) characters by a single space (#x20) character.
905 * Note that the size of dst need to be at least src, and if one doesn't need
906 * to preserve dst (and it doesn't come from a dictionary or read-only) then
907 * passing src as dst is just fine.
908 *
909 * Returns a pointer to the normalized value (dst) or NULL if no conversion
910 * is needed.
911 */
912static xmlChar *
913xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
914{
915 if ((src == NULL) || (dst == NULL))
916 return(NULL);
917
918 while (*src == 0x20) src++;
919 while (*src != 0) {
920 if (*src == 0x20) {
921 while (*src == 0x20) src++;
922 if (*src != 0)
923 *dst++ = 0x20;
924 } else {
925 *dst++ = *src++;
926 }
927 }
928 *dst = 0;
929 if (dst == src)
930 return(NULL);
931 return(dst);
932}
933
934/**
935 * xmlAttrNormalizeSpace2:
936 * @src: the source string
937 *
938 * Normalize the space in non CDATA attribute values, a slightly more complex
939 * front end to avoid allocation problems when running on attribute values
940 * coming from the input.
941 *
942 * Returns a pointer to the normalized value (dst) or NULL if no conversion
943 * is needed.
944 */
945static const xmlChar *
946xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, const xmlChar *src, int *len)
947{
948 int i;
949 int remove_head = 0;
950 int need_realloc = 0;
951 const xmlChar *cur;
952
953 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
954 return(NULL);
955 i = *len;
956 if (i <= 0)
957 return(NULL);
958
959 cur = src;
960 while (*cur == 0x20) {
961 cur++;
962 remove_head++;
963 }
964 while (*cur != 0) {
965 if (*cur == 0x20) {
966 cur++;
967 if ((*cur == 0x20) || (*cur == 0)) {
968 need_realloc = 1;
969 break;
970 }
971 } else
972 cur++;
973 }
974 if (need_realloc) {
975 xmlChar *ret;
976
977 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
978 if (ret == NULL) {
979 xmlErrMemory(ctxt, NULL);
980 return(NULL);
981 }
982 xmlAttrNormalizeSpace(ret, ret);
983 *len = (int) strlen((const char *)ret);
984 return(ret);
985 } else if (remove_head) {
986 *len -= remove_head;
987 return(src + remove_head);
988 }
989 return(NULL);
990}
991
992/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000993 * xmlAddDefAttrs:
994 * @ctxt: an XML parser context
995 * @fullname: the element fullname
996 * @fullattr: the attribute fullname
997 * @value: the attribute value
998 *
999 * Add a defaulted attribute for an element
1000 */
1001static void
1002xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1003 const xmlChar *fullname,
1004 const xmlChar *fullattr,
1005 const xmlChar *value) {
1006 xmlDefAttrsPtr defaults;
1007 int len;
1008 const xmlChar *name;
1009 const xmlChar *prefix;
1010
Daniel Veillard6a31b832008-03-26 14:06:44 +00001011 /*
1012 * Allows to detect attribute redefinitions
1013 */
1014 if (ctxt->attsSpecial != NULL) {
1015 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1016 return;
1017 }
1018
Daniel Veillarde57ec792003-09-10 10:50:59 +00001019 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001020 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 if (ctxt->attsDefault == NULL)
1022 goto mem_error;
1023 }
1024
1025 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001026 * split the element name into prefix:localname , the string found
1027 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001028 */
1029 name = xmlSplitQName3(fullname, &len);
1030 if (name == NULL) {
1031 name = xmlDictLookup(ctxt->dict, fullname, -1);
1032 prefix = NULL;
1033 } else {
1034 name = xmlDictLookup(ctxt->dict, name, -1);
1035 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1036 }
1037
1038 /*
1039 * make sure there is some storage
1040 */
1041 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1042 if (defaults == NULL) {
1043 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +00001044 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001045 if (defaults == NULL)
1046 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001047 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001048 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001049 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1050 defaults, NULL) < 0) {
1051 xmlFree(defaults);
1052 goto mem_error;
1053 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001054 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001055 xmlDefAttrsPtr temp;
1056
1057 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +00001058 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001059 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001060 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001061 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001063 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1064 defaults, NULL) < 0) {
1065 xmlFree(defaults);
1066 goto mem_error;
1067 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001068 }
1069
1070 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001071 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001072 * are within the DTD and hen not associated to namespace names.
1073 */
1074 name = xmlSplitQName3(fullattr, &len);
1075 if (name == NULL) {
1076 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1077 prefix = NULL;
1078 } else {
1079 name = xmlDictLookup(ctxt->dict, name, -1);
1080 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1081 }
1082
1083 defaults->values[4 * defaults->nbAttrs] = name;
1084 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
1085 /* intern the string and precompute the end */
1086 len = xmlStrlen(value);
1087 value = xmlDictLookup(ctxt->dict, value, len);
1088 defaults->values[4 * defaults->nbAttrs + 2] = value;
1089 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
1090 defaults->nbAttrs++;
1091
1092 return;
1093
1094mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001095 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 return;
1097}
1098
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001099/**
1100 * xmlAddSpecialAttr:
1101 * @ctxt: an XML parser context
1102 * @fullname: the element fullname
1103 * @fullattr: the attribute fullname
1104 * @type: the attribute type
1105 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001106 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001107 */
1108static void
1109xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1110 const xmlChar *fullname,
1111 const xmlChar *fullattr,
1112 int type)
1113{
1114 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001115 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001116 if (ctxt->attsSpecial == NULL)
1117 goto mem_error;
1118 }
1119
Daniel Veillardac4118d2008-01-11 05:27:32 +00001120 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1121 return;
1122
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001123 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1124 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001125 return;
1126
1127mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001128 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001129 return;
1130}
1131
Daniel Veillard4432df22003-09-28 18:58:27 +00001132/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001133 * xmlCleanSpecialAttrCallback:
1134 *
1135 * Removes CDATA attributes from the special attribute table
1136 */
1137static void
1138xmlCleanSpecialAttrCallback(void *payload, void *data,
1139 const xmlChar *fullname, const xmlChar *fullattr,
1140 const xmlChar *unused ATTRIBUTE_UNUSED) {
1141 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1142
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001143 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001144 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1145 }
1146}
1147
1148/**
1149 * xmlCleanSpecialAttr:
1150 * @ctxt: an XML parser context
1151 *
1152 * Trim the list of attributes defined to remove all those of type
1153 * CDATA as they are not special. This call should be done when finishing
1154 * to parse the DTD and before starting to parse the document root.
1155 */
1156static void
1157xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1158{
1159 if (ctxt->attsSpecial == NULL)
1160 return;
1161
1162 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1163
1164 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1165 xmlHashFree(ctxt->attsSpecial, NULL);
1166 ctxt->attsSpecial = NULL;
1167 }
1168 return;
1169}
1170
1171/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001172 * xmlCheckLanguageID:
1173 * @lang: pointer to the string value
1174 *
1175 * Checks that the value conforms to the LanguageID production:
1176 *
1177 * NOTE: this is somewhat deprecated, those productions were removed from
1178 * the XML Second edition.
1179 *
1180 * [33] LanguageID ::= Langcode ('-' Subcode)*
1181 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1182 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1183 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1184 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1185 * [38] Subcode ::= ([a-z] | [A-Z])+
1186 *
1187 * Returns 1 if correct 0 otherwise
1188 **/
1189int
1190xmlCheckLanguageID(const xmlChar * lang)
1191{
1192 const xmlChar *cur = lang;
1193
1194 if (cur == NULL)
1195 return (0);
1196 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1197 ((cur[0] == 'I') && (cur[1] == '-'))) {
1198 /*
1199 * IANA code
1200 */
1201 cur += 2;
1202 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1203 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1204 cur++;
1205 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1206 ((cur[0] == 'X') && (cur[1] == '-'))) {
1207 /*
1208 * User code
1209 */
1210 cur += 2;
1211 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1212 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1213 cur++;
1214 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1215 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1216 /*
1217 * ISO639
1218 */
1219 cur++;
1220 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1221 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1222 cur++;
1223 else
1224 return (0);
1225 } else
1226 return (0);
1227 while (cur[0] != 0) { /* non input consuming */
1228 if (cur[0] != '-')
1229 return (0);
1230 cur++;
1231 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1232 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1233 cur++;
1234 else
1235 return (0);
1236 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1237 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1238 cur++;
1239 }
1240 return (1);
1241}
1242
Owen Taylor3473f882001-02-23 17:55:21 +00001243/************************************************************************
1244 * *
1245 * Parser stacks related functions and macros *
1246 * *
1247 ************************************************************************/
1248
1249xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1250 const xmlChar ** str);
1251
Daniel Veillard0fb18932003-09-07 09:14:37 +00001252#ifdef SAX2
1253/**
1254 * nsPush:
1255 * @ctxt: an XML parser context
1256 * @prefix: the namespace prefix or NULL
1257 * @URL: the namespace name
1258 *
1259 * Pushes a new parser namespace on top of the ns stack
1260 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001261 * Returns -1 in case of error, -2 if the namespace should be discarded
1262 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001263 */
1264static int
1265nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1266{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001267 if (ctxt->options & XML_PARSE_NSCLEAN) {
1268 int i;
1269 for (i = 0;i < ctxt->nsNr;i += 2) {
1270 if (ctxt->nsTab[i] == prefix) {
1271 /* in scope */
1272 if (ctxt->nsTab[i + 1] == URL)
1273 return(-2);
1274 /* out of scope keep it */
1275 break;
1276 }
1277 }
1278 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001279 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1280 ctxt->nsMax = 10;
1281 ctxt->nsNr = 0;
1282 ctxt->nsTab = (const xmlChar **)
1283 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1284 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001285 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001286 ctxt->nsMax = 0;
1287 return (-1);
1288 }
1289 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001290 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001291 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001292 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1293 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1294 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001295 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001296 ctxt->nsMax /= 2;
1297 return (-1);
1298 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001299 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001300 }
1301 ctxt->nsTab[ctxt->nsNr++] = prefix;
1302 ctxt->nsTab[ctxt->nsNr++] = URL;
1303 return (ctxt->nsNr);
1304}
1305/**
1306 * nsPop:
1307 * @ctxt: an XML parser context
1308 * @nr: the number to pop
1309 *
1310 * Pops the top @nr parser prefix/namespace from the ns stack
1311 *
1312 * Returns the number of namespaces removed
1313 */
1314static int
1315nsPop(xmlParserCtxtPtr ctxt, int nr)
1316{
1317 int i;
1318
1319 if (ctxt->nsTab == NULL) return(0);
1320 if (ctxt->nsNr < nr) {
1321 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1322 nr = ctxt->nsNr;
1323 }
1324 if (ctxt->nsNr <= 0)
1325 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001326
Daniel Veillard0fb18932003-09-07 09:14:37 +00001327 for (i = 0;i < nr;i++) {
1328 ctxt->nsNr--;
1329 ctxt->nsTab[ctxt->nsNr] = NULL;
1330 }
1331 return(nr);
1332}
1333#endif
1334
1335static int
1336xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1337 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001339 int maxatts;
1340
1341 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001342 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001343 atts = (const xmlChar **)
1344 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001345 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001346 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001347 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1348 if (attallocs == NULL) goto mem_error;
1349 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001351 } else if (nr + 5 > ctxt->maxatts) {
1352 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001353 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1354 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001355 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001356 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001357 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1358 (maxatts / 5) * sizeof(int));
1359 if (attallocs == NULL) goto mem_error;
1360 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 ctxt->maxatts = maxatts;
1362 }
1363 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001364mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001365 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001366 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001367}
1368
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001369/**
1370 * inputPush:
1371 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001372 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001373 *
1374 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001375 *
1376 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001377 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001378int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1380{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001381 if ((ctxt == NULL) || (value == NULL))
1382 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 if (ctxt->inputNr >= ctxt->inputMax) {
1384 ctxt->inputMax *= 2;
1385 ctxt->inputTab =
1386 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1387 ctxt->inputMax *
1388 sizeof(ctxt->inputTab[0]));
1389 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001390 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001391 return (0);
1392 }
1393 }
1394 ctxt->inputTab[ctxt->inputNr] = value;
1395 ctxt->input = value;
1396 return (ctxt->inputNr++);
1397}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001398/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001399 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001400 * @ctxt: an XML parser context
1401 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001402 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001403 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001404 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001405 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001406xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407inputPop(xmlParserCtxtPtr ctxt)
1408{
1409 xmlParserInputPtr ret;
1410
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001411 if (ctxt == NULL)
1412 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001414 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001415 ctxt->inputNr--;
1416 if (ctxt->inputNr > 0)
1417 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1418 else
1419 ctxt->input = NULL;
1420 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001421 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001422 return (ret);
1423}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001424/**
1425 * nodePush:
1426 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001427 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001428 *
1429 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001430 *
1431 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001432 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001433int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001434nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1435{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001436 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001437 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001438 xmlNodePtr *tmp;
1439
1440 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1441 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001442 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001443 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001444 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001445 return (0);
1446 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001447 ctxt->nodeTab = tmp;
1448 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001449 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001450 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001451 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001452 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1453 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001454 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001455 return(0);
1456 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001457 ctxt->nodeTab[ctxt->nodeNr] = value;
1458 ctxt->node = value;
1459 return (ctxt->nodeNr++);
1460}
1461/**
1462 * nodePop:
1463 * @ctxt: an XML parser context
1464 *
1465 * Pops the top element node from the node stack
1466 *
1467 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001468 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001469xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001470nodePop(xmlParserCtxtPtr ctxt)
1471{
1472 xmlNodePtr ret;
1473
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001474 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001475 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001476 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477 ctxt->nodeNr--;
1478 if (ctxt->nodeNr > 0)
1479 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1480 else
1481 ctxt->node = NULL;
1482 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001483 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001484 return (ret);
1485}
Daniel Veillarda2351322004-06-27 12:08:10 +00001486
1487#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001488/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001489 * nameNsPush:
1490 * @ctxt: an XML parser context
1491 * @value: the element name
1492 * @prefix: the element prefix
1493 * @URI: the element namespace name
1494 *
1495 * Pushes a new element name/prefix/URL on top of the name stack
1496 *
1497 * Returns -1 in case of error, the index in the stack otherwise
1498 */
1499static int
1500nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1501 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1502{
1503 if (ctxt->nameNr >= ctxt->nameMax) {
1504 const xmlChar * *tmp;
1505 void **tmp2;
1506 ctxt->nameMax *= 2;
1507 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1508 ctxt->nameMax *
1509 sizeof(ctxt->nameTab[0]));
1510 if (tmp == NULL) {
1511 ctxt->nameMax /= 2;
1512 goto mem_error;
1513 }
1514 ctxt->nameTab = tmp;
1515 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1516 ctxt->nameMax * 3 *
1517 sizeof(ctxt->pushTab[0]));
1518 if (tmp2 == NULL) {
1519 ctxt->nameMax /= 2;
1520 goto mem_error;
1521 }
1522 ctxt->pushTab = tmp2;
1523 }
1524 ctxt->nameTab[ctxt->nameNr] = value;
1525 ctxt->name = value;
1526 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1527 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001528 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001529 return (ctxt->nameNr++);
1530mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001531 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001532 return (-1);
1533}
1534/**
1535 * nameNsPop:
1536 * @ctxt: an XML parser context
1537 *
1538 * Pops the top element/prefix/URI name from the name stack
1539 *
1540 * Returns the name just removed
1541 */
1542static const xmlChar *
1543nameNsPop(xmlParserCtxtPtr ctxt)
1544{
1545 const xmlChar *ret;
1546
1547 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001548 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001549 ctxt->nameNr--;
1550 if (ctxt->nameNr > 0)
1551 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1552 else
1553 ctxt->name = NULL;
1554 ret = ctxt->nameTab[ctxt->nameNr];
1555 ctxt->nameTab[ctxt->nameNr] = NULL;
1556 return (ret);
1557}
Daniel Veillarda2351322004-06-27 12:08:10 +00001558#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001559
1560/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001561 * namePush:
1562 * @ctxt: an XML parser context
1563 * @value: the element name
1564 *
1565 * Pushes a new element name on top of the name stack
1566 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001567 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001568 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001569int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001570namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001571{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001572 if (ctxt == NULL) return (-1);
1573
Daniel Veillard1c732d22002-11-30 11:22:59 +00001574 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001575 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001576 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001577 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 ctxt->nameMax *
1579 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001580 if (tmp == NULL) {
1581 ctxt->nameMax /= 2;
1582 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001583 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001584 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 }
1586 ctxt->nameTab[ctxt->nameNr] = value;
1587 ctxt->name = value;
1588 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001589mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001590 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001591 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001592}
1593/**
1594 * namePop:
1595 * @ctxt: an XML parser context
1596 *
1597 * Pops the top element name from the name stack
1598 *
1599 * Returns the name just removed
1600 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001601const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001602namePop(xmlParserCtxtPtr ctxt)
1603{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001604 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001605
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001606 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1607 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001608 ctxt->nameNr--;
1609 if (ctxt->nameNr > 0)
1610 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1611 else
1612 ctxt->name = NULL;
1613 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001614 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001615 return (ret);
1616}
Owen Taylor3473f882001-02-23 17:55:21 +00001617
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001618static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001619 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001620 int *tmp;
1621
Owen Taylor3473f882001-02-23 17:55:21 +00001622 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001623 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1624 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1625 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001626 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001627 return(0);
1628 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001629 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001630 }
1631 ctxt->spaceTab[ctxt->spaceNr] = val;
1632 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1633 return(ctxt->spaceNr++);
1634}
1635
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001636static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001637 int ret;
1638 if (ctxt->spaceNr <= 0) return(0);
1639 ctxt->spaceNr--;
1640 if (ctxt->spaceNr > 0)
1641 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1642 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001643 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001644 ret = ctxt->spaceTab[ctxt->spaceNr];
1645 ctxt->spaceTab[ctxt->spaceNr] = -1;
1646 return(ret);
1647}
1648
1649/*
1650 * Macros for accessing the content. Those should be used only by the parser,
1651 * and not exported.
1652 *
1653 * Dirty macros, i.e. one often need to make assumption on the context to
1654 * use them
1655 *
1656 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1657 * To be used with extreme caution since operations consuming
1658 * characters may move the input buffer to a different location !
1659 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1660 * This should be used internally by the parser
1661 * only to compare to ASCII values otherwise it would break when
1662 * running with UTF-8 encoding.
1663 * RAW same as CUR but in the input buffer, bypass any token
1664 * extraction that may have been done
1665 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1666 * to compare on ASCII based substring.
1667 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001668 * strings without newlines within the parser.
1669 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1670 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001671 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1672 *
1673 * NEXT Skip to the next character, this does the proper decoding
1674 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001675 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001676 * CUR_CHAR(l) returns the current unicode character (int), set l
1677 * to the number of xmlChars used for the encoding [0-5].
1678 * CUR_SCHAR same but operate on a string instead of the context
1679 * COPY_BUF copy the current unicode char to the target buffer, increment
1680 * the index
1681 * GROW, SHRINK handling of input buffers
1682 */
1683
Daniel Veillardfdc91562002-07-01 21:52:03 +00001684#define RAW (*ctxt->input->cur)
1685#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001686#define NXT(val) ctxt->input->cur[(val)]
1687#define CUR_PTR ctxt->input->cur
1688
Daniel Veillarda07050d2003-10-19 14:46:32 +00001689#define CMP4( s, c1, c2, c3, c4 ) \
1690 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1691 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1692#define CMP5( s, c1, c2, c3, c4, c5 ) \
1693 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1694#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1695 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1696#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1697 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1698#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1699 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1700#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1701 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1702 ((unsigned char *) s)[ 8 ] == c9 )
1703#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1704 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1705 ((unsigned char *) s)[ 9 ] == c10 )
1706
Owen Taylor3473f882001-02-23 17:55:21 +00001707#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001708 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001709 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001710 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001711 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1712 xmlPopInput(ctxt); \
1713 } while (0)
1714
Daniel Veillard0b787f32004-03-26 17:29:53 +00001715#define SKIPL(val) do { \
1716 int skipl; \
1717 for(skipl=0; skipl<val; skipl++) { \
1718 if (*(ctxt->input->cur) == '\n') { \
1719 ctxt->input->line++; ctxt->input->col = 1; \
1720 } else ctxt->input->col++; \
1721 ctxt->nbChars++; \
1722 ctxt->input->cur++; \
1723 } \
1724 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1725 if ((*ctxt->input->cur == 0) && \
1726 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1727 xmlPopInput(ctxt); \
1728 } while (0)
1729
Daniel Veillarda880b122003-04-21 21:36:41 +00001730#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001731 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1732 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001733 xmlSHRINK (ctxt);
1734
1735static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1736 xmlParserInputShrink(ctxt->input);
1737 if ((*ctxt->input->cur == 0) &&
1738 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1739 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001740 }
Owen Taylor3473f882001-02-23 17:55:21 +00001741
Daniel Veillarda880b122003-04-21 21:36:41 +00001742#define GROW if ((ctxt->progressive == 0) && \
1743 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001744 xmlGROW (ctxt);
1745
1746static void xmlGROW (xmlParserCtxtPtr ctxt) {
1747 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1748 if ((*ctxt->input->cur == 0) &&
1749 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1750 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001751}
Owen Taylor3473f882001-02-23 17:55:21 +00001752
1753#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1754
1755#define NEXT xmlNextChar(ctxt)
1756
Daniel Veillard21a0f912001-02-25 19:54:14 +00001757#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001758 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001759 ctxt->input->cur++; \
1760 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001761 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001762 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1763 }
1764
Owen Taylor3473f882001-02-23 17:55:21 +00001765#define NEXTL(l) do { \
1766 if (*(ctxt->input->cur) == '\n') { \
1767 ctxt->input->line++; ctxt->input->col = 1; \
1768 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001769 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001770 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001771 } while (0)
1772
1773#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1774#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1775
1776#define COPY_BUF(l,b,i,v) \
1777 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001778 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001779
1780/**
1781 * xmlSkipBlankChars:
1782 * @ctxt: the XML parser context
1783 *
1784 * skip all blanks character found at that point in the input streams.
1785 * It pops up finished entities in the process if allowable at that point.
1786 *
1787 * Returns the number of space chars skipped
1788 */
1789
1790int
1791xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001792 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001793
1794 /*
1795 * It's Okay to use CUR/NEXT here since all the blanks are on
1796 * the ASCII range.
1797 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001798 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1799 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001800 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001801 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001802 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001803 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001804 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001805 if (*cur == '\n') {
1806 ctxt->input->line++; ctxt->input->col = 1;
1807 }
1808 cur++;
1809 res++;
1810 if (*cur == 0) {
1811 ctxt->input->cur = cur;
1812 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1813 cur = ctxt->input->cur;
1814 }
1815 }
1816 ctxt->input->cur = cur;
1817 } else {
1818 int cur;
1819 do {
1820 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001821 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001822 NEXT;
1823 cur = CUR;
1824 res++;
1825 }
1826 while ((cur == 0) && (ctxt->inputNr > 1) &&
1827 (ctxt->instate != XML_PARSER_COMMENT)) {
1828 xmlPopInput(ctxt);
1829 cur = CUR;
1830 }
1831 /*
1832 * Need to handle support of entities branching here
1833 */
1834 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1835 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1836 }
Owen Taylor3473f882001-02-23 17:55:21 +00001837 return(res);
1838}
1839
1840/************************************************************************
1841 * *
1842 * Commodity functions to handle entities *
1843 * *
1844 ************************************************************************/
1845
1846/**
1847 * xmlPopInput:
1848 * @ctxt: an XML parser context
1849 *
1850 * xmlPopInput: the current input pointed by ctxt->input came to an end
1851 * pop it and return the next char.
1852 *
1853 * Returns the current xmlChar in the parser context
1854 */
1855xmlChar
1856xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001857 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001858 if (xmlParserDebugEntities)
1859 xmlGenericError(xmlGenericErrorContext,
1860 "Popping input %d\n", ctxt->inputNr);
1861 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001862 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001863 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1864 return(xmlPopInput(ctxt));
1865 return(CUR);
1866}
1867
1868/**
1869 * xmlPushInput:
1870 * @ctxt: an XML parser context
1871 * @input: an XML parser input fragment (entity, XML fragment ...).
1872 *
1873 * xmlPushInput: switch to a new input stream which is stacked on top
1874 * of the previous one(s).
1875 */
1876void
1877xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1878 if (input == NULL) return;
1879
1880 if (xmlParserDebugEntities) {
1881 if ((ctxt->input != NULL) && (ctxt->input->filename))
1882 xmlGenericError(xmlGenericErrorContext,
1883 "%s(%d): ", ctxt->input->filename,
1884 ctxt->input->line);
1885 xmlGenericError(xmlGenericErrorContext,
1886 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1887 }
1888 inputPush(ctxt, input);
1889 GROW;
1890}
1891
1892/**
1893 * xmlParseCharRef:
1894 * @ctxt: an XML parser context
1895 *
1896 * parse Reference declarations
1897 *
1898 * [66] CharRef ::= '&#' [0-9]+ ';' |
1899 * '&#x' [0-9a-fA-F]+ ';'
1900 *
1901 * [ WFC: Legal Character ]
1902 * Characters referred to using character references must match the
1903 * production for Char.
1904 *
1905 * Returns the value parsed (as an int), 0 in case of error
1906 */
1907int
1908xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001909 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001910 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001911 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001912
Owen Taylor3473f882001-02-23 17:55:21 +00001913 /*
1914 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1915 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001916 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001917 (NXT(2) == 'x')) {
1918 SKIP(3);
1919 GROW;
1920 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001921 if (count++ > 20) {
1922 count = 0;
1923 GROW;
1924 }
1925 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001926 val = val * 16 + (CUR - '0');
1927 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1928 val = val * 16 + (CUR - 'a') + 10;
1929 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1930 val = val * 16 + (CUR - 'A') + 10;
1931 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001932 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001933 val = 0;
1934 break;
1935 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001936 if (val > 0x10FFFF)
1937 outofrange = val;
1938
Owen Taylor3473f882001-02-23 17:55:21 +00001939 NEXT;
1940 count++;
1941 }
1942 if (RAW == ';') {
1943 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001944 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001945 ctxt->nbChars ++;
1946 ctxt->input->cur++;
1947 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001948 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001949 SKIP(2);
1950 GROW;
1951 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001952 if (count++ > 20) {
1953 count = 0;
1954 GROW;
1955 }
1956 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001957 val = val * 10 + (CUR - '0');
1958 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001959 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001960 val = 0;
1961 break;
1962 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001963 if (val > 0x10FFFF)
1964 outofrange = val;
1965
Owen Taylor3473f882001-02-23 17:55:21 +00001966 NEXT;
1967 count++;
1968 }
1969 if (RAW == ';') {
1970 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001971 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001972 ctxt->nbChars ++;
1973 ctxt->input->cur++;
1974 }
1975 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 }
1978
1979 /*
1980 * [ WFC: Legal Character ]
1981 * Characters referred to using character references must match the
1982 * production for Char.
1983 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001984 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001985 return(val);
1986 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001987 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1988 "xmlParseCharRef: invalid xmlChar value %d\n",
1989 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001990 }
1991 return(0);
1992}
1993
1994/**
1995 * xmlParseStringCharRef:
1996 * @ctxt: an XML parser context
1997 * @str: a pointer to an index in the string
1998 *
1999 * parse Reference declarations, variant parsing from a string rather
2000 * than an an input flow.
2001 *
2002 * [66] CharRef ::= '&#' [0-9]+ ';' |
2003 * '&#x' [0-9a-fA-F]+ ';'
2004 *
2005 * [ WFC: Legal Character ]
2006 * Characters referred to using character references must match the
2007 * production for Char.
2008 *
2009 * Returns the value parsed (as an int), 0 in case of error, str will be
2010 * updated to the current value of the index
2011 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002012static int
Owen Taylor3473f882001-02-23 17:55:21 +00002013xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2014 const xmlChar *ptr;
2015 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002016 unsigned int val = 0;
2017 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002018
2019 if ((str == NULL) || (*str == NULL)) return(0);
2020 ptr = *str;
2021 cur = *ptr;
2022 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2023 ptr += 3;
2024 cur = *ptr;
2025 while (cur != ';') { /* Non input consuming loop */
2026 if ((cur >= '0') && (cur <= '9'))
2027 val = val * 16 + (cur - '0');
2028 else if ((cur >= 'a') && (cur <= 'f'))
2029 val = val * 16 + (cur - 'a') + 10;
2030 else if ((cur >= 'A') && (cur <= 'F'))
2031 val = val * 16 + (cur - 'A') + 10;
2032 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002033 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002034 val = 0;
2035 break;
2036 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002037 if (val > 0x10FFFF)
2038 outofrange = val;
2039
Owen Taylor3473f882001-02-23 17:55:21 +00002040 ptr++;
2041 cur = *ptr;
2042 }
2043 if (cur == ';')
2044 ptr++;
2045 } else if ((cur == '&') && (ptr[1] == '#')){
2046 ptr += 2;
2047 cur = *ptr;
2048 while (cur != ';') { /* Non input consuming loops */
2049 if ((cur >= '0') && (cur <= '9'))
2050 val = val * 10 + (cur - '0');
2051 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002052 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002053 val = 0;
2054 break;
2055 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002056 if (val > 0x10FFFF)
2057 outofrange = val;
2058
Owen Taylor3473f882001-02-23 17:55:21 +00002059 ptr++;
2060 cur = *ptr;
2061 }
2062 if (cur == ';')
2063 ptr++;
2064 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002065 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002066 return(0);
2067 }
2068 *str = ptr;
2069
2070 /*
2071 * [ WFC: Legal Character ]
2072 * Characters referred to using character references must match the
2073 * production for Char.
2074 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002075 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002076 return(val);
2077 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002078 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2079 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2080 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002081 }
2082 return(0);
2083}
2084
2085/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002086 * xmlNewBlanksWrapperInputStream:
2087 * @ctxt: an XML parser context
2088 * @entity: an Entity pointer
2089 *
2090 * Create a new input stream for wrapping
2091 * blanks around a PEReference
2092 *
2093 * Returns the new input stream or NULL
2094 */
2095
2096static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2097
Daniel Veillardf4862f02002-09-10 11:13:43 +00002098static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002099xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2100 xmlParserInputPtr input;
2101 xmlChar *buffer;
2102 size_t length;
2103 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002104 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2105 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002106 return(NULL);
2107 }
2108 if (xmlParserDebugEntities)
2109 xmlGenericError(xmlGenericErrorContext,
2110 "new blanks wrapper for entity: %s\n", entity->name);
2111 input = xmlNewInputStream(ctxt);
2112 if (input == NULL) {
2113 return(NULL);
2114 }
2115 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002116 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002117 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002118 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002119 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002120 return(NULL);
2121 }
2122 buffer [0] = ' ';
2123 buffer [1] = '%';
2124 buffer [length-3] = ';';
2125 buffer [length-2] = ' ';
2126 buffer [length-1] = 0;
2127 memcpy(buffer + 2, entity->name, length - 5);
2128 input->free = deallocblankswrapper;
2129 input->base = buffer;
2130 input->cur = buffer;
2131 input->length = length;
2132 input->end = &buffer[length];
2133 return(input);
2134}
2135
2136/**
Owen Taylor3473f882001-02-23 17:55:21 +00002137 * xmlParserHandlePEReference:
2138 * @ctxt: the parser context
2139 *
2140 * [69] PEReference ::= '%' Name ';'
2141 *
2142 * [ WFC: No Recursion ]
2143 * A parsed entity must not contain a recursive
2144 * reference to itself, either directly or indirectly.
2145 *
2146 * [ WFC: Entity Declared ]
2147 * In a document without any DTD, a document with only an internal DTD
2148 * subset which contains no parameter entity references, or a document
2149 * with "standalone='yes'", ... ... The declaration of a parameter
2150 * entity must precede any reference to it...
2151 *
2152 * [ VC: Entity Declared ]
2153 * In a document with an external subset or external parameter entities
2154 * with "standalone='no'", ... ... The declaration of a parameter entity
2155 * must precede any reference to it...
2156 *
2157 * [ WFC: In DTD ]
2158 * Parameter-entity references may only appear in the DTD.
2159 * NOTE: misleading but this is handled.
2160 *
2161 * A PEReference may have been detected in the current input stream
2162 * the handling is done accordingly to
2163 * http://www.w3.org/TR/REC-xml#entproc
2164 * i.e.
2165 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002166 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002167 */
2168void
2169xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002170 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 xmlEntityPtr entity = NULL;
2172 xmlParserInputPtr input;
2173
Owen Taylor3473f882001-02-23 17:55:21 +00002174 if (RAW != '%') return;
2175 switch(ctxt->instate) {
2176 case XML_PARSER_CDATA_SECTION:
2177 return;
2178 case XML_PARSER_COMMENT:
2179 return;
2180 case XML_PARSER_START_TAG:
2181 return;
2182 case XML_PARSER_END_TAG:
2183 return;
2184 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002185 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002186 return;
2187 case XML_PARSER_PROLOG:
2188 case XML_PARSER_START:
2189 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002190 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002191 return;
2192 case XML_PARSER_ENTITY_DECL:
2193 case XML_PARSER_CONTENT:
2194 case XML_PARSER_ATTRIBUTE_VALUE:
2195 case XML_PARSER_PI:
2196 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002197 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002198 /* we just ignore it there */
2199 return;
2200 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002201 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002202 return;
2203 case XML_PARSER_ENTITY_VALUE:
2204 /*
2205 * NOTE: in the case of entity values, we don't do the
2206 * substitution here since we need the literal
2207 * entity value to be able to save the internal
2208 * subset of the document.
2209 * This will be handled by xmlStringDecodeEntities
2210 */
2211 return;
2212 case XML_PARSER_DTD:
2213 /*
2214 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2215 * In the internal DTD subset, parameter-entity references
2216 * can occur only where markup declarations can occur, not
2217 * within markup declarations.
2218 * In that case this is handled in xmlParseMarkupDecl
2219 */
2220 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2221 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002222 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002223 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002224 break;
2225 case XML_PARSER_IGNORE:
2226 return;
2227 }
2228
2229 NEXT;
2230 name = xmlParseName(ctxt);
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002233 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002234 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002235 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002236 } else {
2237 if (RAW == ';') {
2238 NEXT;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2240 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2241 if (entity == NULL) {
2242
2243 /*
2244 * [ WFC: Entity Declared ]
2245 * In a document without any DTD, a document with only an
2246 * internal DTD subset which contains no parameter entity
2247 * references, or a document with "standalone='yes'", ...
2248 * ... The declaration of a parameter entity must precede
2249 * any reference to it...
2250 */
2251 if ((ctxt->standalone == 1) ||
2252 ((ctxt->hasExternalSubset == 0) &&
2253 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002254 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002255 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002256 } else {
2257 /*
2258 * [ VC: Entity Declared ]
2259 * In a document with an external subset or external
2260 * parameter entities with "standalone='no'", ...
2261 * ... The declaration of a parameter entity must precede
2262 * any reference to it...
2263 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002264 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2265 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2266 "PEReference: %%%s; not found\n",
2267 name);
2268 } else
2269 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2270 "PEReference: %%%s; not found\n",
2271 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272 ctxt->valid = 0;
2273 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002274 } else if (ctxt->input->free != deallocblankswrapper) {
2275 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2276 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002277 } else {
2278 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2279 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002280 xmlChar start[4];
2281 xmlCharEncoding enc;
2282
Owen Taylor3473f882001-02-23 17:55:21 +00002283 /*
2284 * handle the extra spaces added before and after
2285 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002286 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002287 */
2288 input = xmlNewEntityInputStream(ctxt, entity);
2289 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002290
2291 /*
2292 * Get the 4 first bytes and decode the charset
2293 * if enc != XML_CHAR_ENCODING_NONE
2294 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002295 * Note that, since we may have some non-UTF8
2296 * encoding (like UTF16, bug 135229), the 'length'
2297 * is not known, but we can calculate based upon
2298 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002299 */
2300 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002301 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002302 start[0] = RAW;
2303 start[1] = NXT(1);
2304 start[2] = NXT(2);
2305 start[3] = NXT(3);
2306 enc = xmlDetectCharEncoding(start, 4);
2307 if (enc != XML_CHAR_ENCODING_NONE) {
2308 xmlSwitchEncoding(ctxt, enc);
2309 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002310 }
2311
Owen Taylor3473f882001-02-23 17:55:21 +00002312 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002313 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2314 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002315 xmlParseTextDecl(ctxt);
2316 }
Owen Taylor3473f882001-02-23 17:55:21 +00002317 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002318 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2319 "PEReference: %s is not a parameter entity\n",
2320 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002321 }
2322 }
2323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002324 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002325 }
Owen Taylor3473f882001-02-23 17:55:21 +00002326 }
2327}
2328
2329/*
2330 * Macro used to grow the current buffer.
2331 */
2332#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002333 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002334 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002335 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002336 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002337 if (tmp == NULL) goto mem_error; \
2338 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002339}
2340
2341/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002342 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002343 * @ctxt: the parser context
2344 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002346 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2347 * @end: an end marker xmlChar, 0 if none
2348 * @end2: an end marker xmlChar, 0 if none
2349 * @end3: an end marker xmlChar, 0 if none
2350 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002351 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002352 *
2353 * [67] Reference ::= EntityRef | CharRef
2354 *
2355 * [69] PEReference ::= '%' Name ';'
2356 *
2357 * Returns A newly allocated string with the substitution done. The caller
2358 * must deallocate it !
2359 */
2360xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002361xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2362 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002363 xmlChar *buffer = NULL;
2364 int buffer_size = 0;
2365
2366 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002367 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002368 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 xmlEntityPtr ent;
2370 int c,l;
2371 int nbchars = 0;
2372
Daniel Veillarda82b1822004-11-08 16:24:57 +00002373 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002374 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002375 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002378 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 return(NULL);
2380 }
2381
2382 /*
2383 * allocate a translation buffer.
2384 */
2385 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002386 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002387 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002388
2389 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002390 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002391 * we are operating on already parsed values.
2392 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002393 if (str < last)
2394 c = CUR_SCHAR(str, l);
2395 else
2396 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002397 while ((c != 0) && (c != end) && /* non input consuming loop */
2398 (c != end2) && (c != end3)) {
2399
2400 if (c == 0) break;
2401 if ((c == '&') && (str[1] == '#')) {
2402 int val = xmlParseStringCharRef(ctxt, &str);
2403 if (val != 0) {
2404 COPY_BUF(0,buffer,nbchars,val);
2405 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002406 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2407 growBuffer(buffer);
2408 }
Owen Taylor3473f882001-02-23 17:55:21 +00002409 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2410 if (xmlParserDebugEntities)
2411 xmlGenericError(xmlGenericErrorContext,
2412 "String decoding Entity Reference: %.30s\n",
2413 str);
2414 ent = xmlParseStringEntityRef(ctxt, &str);
2415 if ((ent != NULL) &&
2416 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2417 if (ent->content != NULL) {
2418 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002419 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2420 growBuffer(buffer);
2421 }
Owen Taylor3473f882001-02-23 17:55:21 +00002422 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002423 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2424 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002425 }
2426 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002427 ctxt->depth++;
2428 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2429 0, 0, 0);
2430 ctxt->depth--;
2431 if (rep != NULL) {
2432 current = rep;
2433 while (*current != 0) { /* non input consuming loop */
2434 buffer[nbchars++] = *current++;
2435 if (nbchars >
2436 buffer_size - XML_PARSER_BUFFER_SIZE) {
2437 growBuffer(buffer);
2438 }
2439 }
2440 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002441 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002442 }
2443 } else if (ent != NULL) {
2444 int i = xmlStrlen(ent->name);
2445 const xmlChar *cur = ent->name;
2446
2447 buffer[nbchars++] = '&';
2448 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2449 growBuffer(buffer);
2450 }
2451 for (;i > 0;i--)
2452 buffer[nbchars++] = *cur++;
2453 buffer[nbchars++] = ';';
2454 }
2455 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2456 if (xmlParserDebugEntities)
2457 xmlGenericError(xmlGenericErrorContext,
2458 "String decoding PE Reference: %.30s\n", str);
2459 ent = xmlParseStringPEReference(ctxt, &str);
2460 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002461 if (ent->content == NULL) {
2462 if (xmlLoadEntityContent(ctxt, ent) < 0) {
2463 }
2464 }
Owen Taylor3473f882001-02-23 17:55:21 +00002465 ctxt->depth++;
2466 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2467 0, 0, 0);
2468 ctxt->depth--;
2469 if (rep != NULL) {
2470 current = rep;
2471 while (*current != 0) { /* non input consuming loop */
2472 buffer[nbchars++] = *current++;
2473 if (nbchars >
2474 buffer_size - XML_PARSER_BUFFER_SIZE) {
2475 growBuffer(buffer);
2476 }
2477 }
2478 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002479 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002480 }
2481 }
2482 } else {
2483 COPY_BUF(l,buffer,nbchars,c);
2484 str += l;
2485 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2486 growBuffer(buffer);
2487 }
2488 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002489 if (str < last)
2490 c = CUR_SCHAR(str, l);
2491 else
2492 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002493 }
2494 buffer[nbchars++] = 0;
2495 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002496
2497mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002498 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002499 if (rep != NULL)
2500 xmlFree(rep);
2501 if (buffer != NULL)
2502 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002503 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002504}
2505
Daniel Veillarde57ec792003-09-10 10:50:59 +00002506/**
2507 * xmlStringDecodeEntities:
2508 * @ctxt: the parser context
2509 * @str: the input string
2510 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2511 * @end: an end marker xmlChar, 0 if none
2512 * @end2: an end marker xmlChar, 0 if none
2513 * @end3: an end marker xmlChar, 0 if none
2514 *
2515 * Takes a entity string content and process to do the adequate substitutions.
2516 *
2517 * [67] Reference ::= EntityRef | CharRef
2518 *
2519 * [69] PEReference ::= '%' Name ';'
2520 *
2521 * Returns A newly allocated string with the substitution done. The caller
2522 * must deallocate it !
2523 */
2524xmlChar *
2525xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2526 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002527 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002528 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2529 end, end2, end3));
2530}
Owen Taylor3473f882001-02-23 17:55:21 +00002531
2532/************************************************************************
2533 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002534 * Commodity functions, cleanup needed ? *
2535 * *
2536 ************************************************************************/
2537
2538/**
2539 * areBlanks:
2540 * @ctxt: an XML parser context
2541 * @str: a xmlChar *
2542 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002543 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002544 *
2545 * Is this a sequence of blank chars that one can ignore ?
2546 *
2547 * Returns 1 if ignorable 0 otherwise.
2548 */
2549
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002550static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2551 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002552 int i, ret;
2553 xmlNodePtr lastChild;
2554
Daniel Veillard05c13a22001-09-09 08:38:09 +00002555 /*
2556 * Don't spend time trying to differentiate them, the same callback is
2557 * used !
2558 */
2559 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002560 return(0);
2561
Owen Taylor3473f882001-02-23 17:55:21 +00002562 /*
2563 * Check for xml:space value.
2564 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002565 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2566 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002567 return(0);
2568
2569 /*
2570 * Check that the string is made of blanks
2571 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002572 if (blank_chars == 0) {
2573 for (i = 0;i < len;i++)
2574 if (!(IS_BLANK_CH(str[i]))) return(0);
2575 }
Owen Taylor3473f882001-02-23 17:55:21 +00002576
2577 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002578 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002579 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002580 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002581 if (ctxt->myDoc != NULL) {
2582 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2583 if (ret == 0) return(1);
2584 if (ret == 1) return(0);
2585 }
2586
2587 /*
2588 * Otherwise, heuristic :-\
2589 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002590 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002591 if ((ctxt->node->children == NULL) &&
2592 (RAW == '<') && (NXT(1) == '/')) return(0);
2593
2594 lastChild = xmlGetLastChild(ctxt->node);
2595 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002596 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2597 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002598 } else if (xmlNodeIsText(lastChild))
2599 return(0);
2600 else if ((ctxt->node->children != NULL) &&
2601 (xmlNodeIsText(ctxt->node->children)))
2602 return(0);
2603 return(1);
2604}
2605
Owen Taylor3473f882001-02-23 17:55:21 +00002606/************************************************************************
2607 * *
2608 * Extra stuff for namespace support *
2609 * Relates to http://www.w3.org/TR/WD-xml-names *
2610 * *
2611 ************************************************************************/
2612
2613/**
2614 * xmlSplitQName:
2615 * @ctxt: an XML parser context
2616 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002617 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002618 *
2619 * parse an UTF8 encoded XML qualified name string
2620 *
2621 * [NS 5] QName ::= (Prefix ':')? LocalPart
2622 *
2623 * [NS 6] Prefix ::= NCName
2624 *
2625 * [NS 7] LocalPart ::= NCName
2626 *
2627 * Returns the local part, and prefix is updated
2628 * to get the Prefix if any.
2629 */
2630
2631xmlChar *
2632xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2633 xmlChar buf[XML_MAX_NAMELEN + 5];
2634 xmlChar *buffer = NULL;
2635 int len = 0;
2636 int max = XML_MAX_NAMELEN;
2637 xmlChar *ret = NULL;
2638 const xmlChar *cur = name;
2639 int c;
2640
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002641 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 *prefix = NULL;
2643
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002644 if (cur == NULL) return(NULL);
2645
Owen Taylor3473f882001-02-23 17:55:21 +00002646#ifndef XML_XML_NAMESPACE
2647 /* xml: prefix is not really a namespace */
2648 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2649 (cur[2] == 'l') && (cur[3] == ':'))
2650 return(xmlStrdup(name));
2651#endif
2652
Daniel Veillard597bc482003-07-24 16:08:28 +00002653 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002654 if (cur[0] == ':')
2655 return(xmlStrdup(name));
2656
2657 c = *cur++;
2658 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2659 buf[len++] = c;
2660 c = *cur++;
2661 }
2662 if (len >= max) {
2663 /*
2664 * Okay someone managed to make a huge name, so he's ready to pay
2665 * for the processing speed.
2666 */
2667 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002668
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002669 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002670 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002671 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002672 return(NULL);
2673 }
2674 memcpy(buffer, buf, len);
2675 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2676 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002677 xmlChar *tmp;
2678
Owen Taylor3473f882001-02-23 17:55:21 +00002679 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002680 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002681 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002682 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002683 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002685 return(NULL);
2686 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002687 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002688 }
2689 buffer[len++] = c;
2690 c = *cur++;
2691 }
2692 buffer[len] = 0;
2693 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002694
Daniel Veillard597bc482003-07-24 16:08:28 +00002695 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002696 if (buffer != NULL)
2697 xmlFree(buffer);
2698 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002699 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002700 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002701
Owen Taylor3473f882001-02-23 17:55:21 +00002702 if (buffer == NULL)
2703 ret = xmlStrndup(buf, len);
2704 else {
2705 ret = buffer;
2706 buffer = NULL;
2707 max = XML_MAX_NAMELEN;
2708 }
2709
2710
2711 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002712 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002714 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002715 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002716 }
Owen Taylor3473f882001-02-23 17:55:21 +00002717 len = 0;
2718
Daniel Veillardbb284f42002-10-16 18:02:47 +00002719 /*
2720 * Check that the first character is proper to start
2721 * a new name
2722 */
2723 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2724 ((c >= 0x41) && (c <= 0x5A)) ||
2725 (c == '_') || (c == ':'))) {
2726 int l;
2727 int first = CUR_SCHAR(cur, l);
2728
2729 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002730 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002731 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002732 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002733 }
2734 }
2735 cur++;
2736
Owen Taylor3473f882001-02-23 17:55:21 +00002737 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2738 buf[len++] = c;
2739 c = *cur++;
2740 }
2741 if (len >= max) {
2742 /*
2743 * Okay someone managed to make a huge name, so he's ready to pay
2744 * for the processing speed.
2745 */
2746 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002747
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002748 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002749 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002750 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002751 return(NULL);
2752 }
2753 memcpy(buffer, buf, len);
2754 while (c != 0) { /* tested bigname2.xml */
2755 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 xmlChar *tmp;
2757
Owen Taylor3473f882001-02-23 17:55:21 +00002758 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002759 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002760 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002761 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002762 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002763 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002764 return(NULL);
2765 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002766 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002767 }
2768 buffer[len++] = c;
2769 c = *cur++;
2770 }
2771 buffer[len] = 0;
2772 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002773
Owen Taylor3473f882001-02-23 17:55:21 +00002774 if (buffer == NULL)
2775 ret = xmlStrndup(buf, len);
2776 else {
2777 ret = buffer;
2778 }
2779 }
2780
2781 return(ret);
2782}
2783
2784/************************************************************************
2785 * *
2786 * The parser itself *
2787 * Relates to http://www.w3.org/TR/REC-xml *
2788 * *
2789 ************************************************************************/
2790
Daniel Veillard34e3f642008-07-29 09:02:27 +00002791/************************************************************************
2792 * *
2793 * Routines to parse Name, NCName and NmToken *
2794 * *
2795 ************************************************************************/
2796unsigned long nbParseName = 0;
2797unsigned long nbParseNmToken = 0;
2798unsigned long nbParseNCName = 0;
2799unsigned long nbParseNCNameComplex = 0;
2800unsigned long nbParseNameComplex = 0;
2801unsigned long nbParseStringName = 0;
2802/*
2803 * The two following functions are related to the change of accepted
2804 * characters for Name and NmToken in the Revision 5 of XML-1.0
2805 * They correspond to the modified production [4] and the new production [4a]
2806 * changes in that revision. Also note that the macros used for the
2807 * productions Letter, Digit, CombiningChar and Extender are not needed
2808 * anymore.
2809 * We still keep compatibility to pre-revision5 parsing semantic if the
2810 * new XML_PARSE_OLD10 option is given to the parser.
2811 */
2812static int
2813xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2814 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2815 /*
2816 * Use the new checks of production [4] [4a] amd [5] of the
2817 * Update 5 of XML-1.0
2818 */
2819 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2820 (((c >= 'a') && (c <= 'z')) ||
2821 ((c >= 'A') && (c <= 'Z')) ||
2822 (c == '_') || (c == ':') ||
2823 ((c >= 0xC0) && (c <= 0xD6)) ||
2824 ((c >= 0xD8) && (c <= 0xF6)) ||
2825 ((c >= 0xF8) && (c <= 0x2FF)) ||
2826 ((c >= 0x370) && (c <= 0x37D)) ||
2827 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2828 ((c >= 0x200C) && (c <= 0x200D)) ||
2829 ((c >= 0x2070) && (c <= 0x218F)) ||
2830 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2831 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2832 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2833 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2834 ((c >= 0x10000) && (c <= 0xEFFFF))))
2835 return(1);
2836 } else {
2837 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2838 return(1);
2839 }
2840 return(0);
2841}
2842
2843static int
2844xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2845 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2846 /*
2847 * Use the new checks of production [4] [4a] amd [5] of the
2848 * Update 5 of XML-1.0
2849 */
2850 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2851 (((c >= 'a') && (c <= 'z')) ||
2852 ((c >= 'A') && (c <= 'Z')) ||
2853 ((c >= '0') && (c <= '9')) || /* !start */
2854 (c == '_') || (c == ':') ||
2855 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2856 ((c >= 0xC0) && (c <= 0xD6)) ||
2857 ((c >= 0xD8) && (c <= 0xF6)) ||
2858 ((c >= 0xF8) && (c <= 0x2FF)) ||
2859 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2860 ((c >= 0x370) && (c <= 0x37D)) ||
2861 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2862 ((c >= 0x200C) && (c <= 0x200D)) ||
2863 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2864 ((c >= 0x2070) && (c <= 0x218F)) ||
2865 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2866 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2867 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2868 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2869 ((c >= 0x10000) && (c <= 0xEFFFF))))
2870 return(1);
2871 } else {
2872 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2873 (c == '.') || (c == '-') ||
2874 (c == '_') || (c == ':') ||
2875 (IS_COMBINING(c)) ||
2876 (IS_EXTENDER(c)))
2877 return(1);
2878 }
2879 return(0);
2880}
2881
Daniel Veillarde57ec792003-09-10 10:50:59 +00002882static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002883 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002884
Daniel Veillard34e3f642008-07-29 09:02:27 +00002885static const xmlChar *
2886xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2887 int len = 0, l;
2888 int c;
2889 int count = 0;
2890
2891 nbParseNameComplex++;
2892
2893 /*
2894 * Handler for more complex cases
2895 */
2896 GROW;
2897 c = CUR_CHAR(l);
2898 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2899 /*
2900 * Use the new checks of production [4] [4a] amd [5] of the
2901 * Update 5 of XML-1.0
2902 */
2903 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2904 (!(((c >= 'a') && (c <= 'z')) ||
2905 ((c >= 'A') && (c <= 'Z')) ||
2906 (c == '_') || (c == ':') ||
2907 ((c >= 0xC0) && (c <= 0xD6)) ||
2908 ((c >= 0xD8) && (c <= 0xF6)) ||
2909 ((c >= 0xF8) && (c <= 0x2FF)) ||
2910 ((c >= 0x370) && (c <= 0x37D)) ||
2911 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2912 ((c >= 0x200C) && (c <= 0x200D)) ||
2913 ((c >= 0x2070) && (c <= 0x218F)) ||
2914 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2915 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2916 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2917 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2918 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
2919 return(NULL);
2920 }
2921 len += l;
2922 NEXTL(l);
2923 c = CUR_CHAR(l);
2924 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2925 (((c >= 'a') && (c <= 'z')) ||
2926 ((c >= 'A') && (c <= 'Z')) ||
2927 ((c >= '0') && (c <= '9')) || /* !start */
2928 (c == '_') || (c == ':') ||
2929 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2930 ((c >= 0xC0) && (c <= 0xD6)) ||
2931 ((c >= 0xD8) && (c <= 0xF6)) ||
2932 ((c >= 0xF8) && (c <= 0x2FF)) ||
2933 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2934 ((c >= 0x370) && (c <= 0x37D)) ||
2935 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2936 ((c >= 0x200C) && (c <= 0x200D)) ||
2937 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2938 ((c >= 0x2070) && (c <= 0x218F)) ||
2939 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2940 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2941 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2942 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2943 ((c >= 0x10000) && (c <= 0xEFFFF))
2944 )) {
2945 if (count++ > 100) {
2946 count = 0;
2947 GROW;
2948 }
2949 len += l;
2950 NEXTL(l);
2951 c = CUR_CHAR(l);
2952 }
2953 } else {
2954 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2955 (!IS_LETTER(c) && (c != '_') &&
2956 (c != ':'))) {
2957 return(NULL);
2958 }
2959 len += l;
2960 NEXTL(l);
2961 c = CUR_CHAR(l);
2962
2963 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
2964 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2965 (c == '.') || (c == '-') ||
2966 (c == '_') || (c == ':') ||
2967 (IS_COMBINING(c)) ||
2968 (IS_EXTENDER(c)))) {
2969 if (count++ > 100) {
2970 count = 0;
2971 GROW;
2972 }
2973 len += l;
2974 NEXTL(l);
2975 c = CUR_CHAR(l);
2976 }
2977 }
2978 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2979 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
2980 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
2981}
2982
Owen Taylor3473f882001-02-23 17:55:21 +00002983/**
2984 * xmlParseName:
2985 * @ctxt: an XML parser context
2986 *
2987 * parse an XML name.
2988 *
2989 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2990 * CombiningChar | Extender
2991 *
2992 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2993 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002994 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002995 *
2996 * Returns the Name parsed or NULL
2997 */
2998
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002999const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003000xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003001 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003002 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003003 int count = 0;
3004
3005 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003006
Daniel Veillard34e3f642008-07-29 09:02:27 +00003007 nbParseName++;
3008
Daniel Veillard48b2f892001-02-25 16:11:03 +00003009 /*
3010 * Accelerator for simple ASCII names
3011 */
3012 in = ctxt->input->cur;
3013 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3014 ((*in >= 0x41) && (*in <= 0x5A)) ||
3015 (*in == '_') || (*in == ':')) {
3016 in++;
3017 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3018 ((*in >= 0x41) && (*in <= 0x5A)) ||
3019 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003020 (*in == '_') || (*in == '-') ||
3021 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003022 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003023 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003024 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003025 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003026 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003027 ctxt->nbChars += count;
3028 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003029 if (ret == NULL)
3030 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003031 return(ret);
3032 }
3033 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003034 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003035 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003036}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003037
Daniel Veillard34e3f642008-07-29 09:02:27 +00003038static const xmlChar *
3039xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3040 int len = 0, l;
3041 int c;
3042 int count = 0;
3043
3044 nbParseNCNameComplex++;
3045
3046 /*
3047 * Handler for more complex cases
3048 */
3049 GROW;
3050 c = CUR_CHAR(l);
3051 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3052 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3053 return(NULL);
3054 }
3055
3056 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3057 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3058 if (count++ > 100) {
3059 count = 0;
3060 GROW;
3061 }
3062 len += l;
3063 NEXTL(l);
3064 c = CUR_CHAR(l);
3065 }
3066 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3067}
3068
3069/**
3070 * xmlParseNCName:
3071 * @ctxt: an XML parser context
3072 * @len: lenght of the string parsed
3073 *
3074 * parse an XML name.
3075 *
3076 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3077 * CombiningChar | Extender
3078 *
3079 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3080 *
3081 * Returns the Name parsed or NULL
3082 */
3083
3084static const xmlChar *
3085xmlParseNCName(xmlParserCtxtPtr ctxt) {
3086 const xmlChar *in;
3087 const xmlChar *ret;
3088 int count = 0;
3089
3090 nbParseNCName++;
3091
3092 /*
3093 * Accelerator for simple ASCII names
3094 */
3095 in = ctxt->input->cur;
3096 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3097 ((*in >= 0x41) && (*in <= 0x5A)) ||
3098 (*in == '_')) {
3099 in++;
3100 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3101 ((*in >= 0x41) && (*in <= 0x5A)) ||
3102 ((*in >= 0x30) && (*in <= 0x39)) ||
3103 (*in == '_') || (*in == '-') ||
3104 (*in == '.'))
3105 in++;
3106 if ((*in > 0) && (*in < 0x80)) {
3107 count = in - ctxt->input->cur;
3108 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3109 ctxt->input->cur = in;
3110 ctxt->nbChars += count;
3111 ctxt->input->col += count;
3112 if (ret == NULL) {
3113 xmlErrMemory(ctxt, NULL);
3114 }
3115 return(ret);
3116 }
3117 }
3118 return(xmlParseNCNameComplex(ctxt));
3119}
3120
Daniel Veillard46de64e2002-05-29 08:21:33 +00003121/**
3122 * xmlParseNameAndCompare:
3123 * @ctxt: an XML parser context
3124 *
3125 * parse an XML name and compares for match
3126 * (specialized for endtag parsing)
3127 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003128 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3129 * and the name for mismatch
3130 */
3131
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003132static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003133xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003134 register const xmlChar *cmp = other;
3135 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003136 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003137
3138 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139
Daniel Veillard46de64e2002-05-29 08:21:33 +00003140 in = ctxt->input->cur;
3141 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003142 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003143 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003144 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003145 }
William M. Brack76e95df2003-10-18 16:20:14 +00003146 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003147 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003148 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003149 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003150 }
3151 /* failure (or end of input buffer), check with full function */
3152 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003153 /* strings coming from the dictionnary direct compare possible */
3154 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003155 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003156 }
3157 return ret;
3158}
3159
Owen Taylor3473f882001-02-23 17:55:21 +00003160/**
3161 * xmlParseStringName:
3162 * @ctxt: an XML parser context
3163 * @str: a pointer to the string pointer (IN/OUT)
3164 *
3165 * parse an XML name.
3166 *
3167 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3168 * CombiningChar | Extender
3169 *
3170 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3171 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003172 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003173 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003174 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003175 * is updated to the current location in the string.
3176 */
3177
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003178static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003179xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3180 xmlChar buf[XML_MAX_NAMELEN + 5];
3181 const xmlChar *cur = *str;
3182 int len = 0, l;
3183 int c;
3184
Daniel Veillard34e3f642008-07-29 09:02:27 +00003185 nbParseStringName++;
3186
Owen Taylor3473f882001-02-23 17:55:21 +00003187 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003188 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003189 return(NULL);
3190 }
3191
Daniel Veillard34e3f642008-07-29 09:02:27 +00003192 COPY_BUF(l,buf,len,c);
3193 cur += l;
3194 c = CUR_SCHAR(cur, l);
3195 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003196 COPY_BUF(l,buf,len,c);
3197 cur += l;
3198 c = CUR_SCHAR(cur, l);
3199 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3200 /*
3201 * Okay someone managed to make a huge name, so he's ready to pay
3202 * for the processing speed.
3203 */
3204 xmlChar *buffer;
3205 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003206
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003207 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 return(NULL);
3211 }
3212 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003214 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003215 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003217 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003218 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003219 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003220 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003221 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003222 return(NULL);
3223 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003224 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003225 }
3226 COPY_BUF(l,buffer,len,c);
3227 cur += l;
3228 c = CUR_SCHAR(cur, l);
3229 }
3230 buffer[len] = 0;
3231 *str = cur;
3232 return(buffer);
3233 }
3234 }
3235 *str = cur;
3236 return(xmlStrndup(buf, len));
3237}
3238
3239/**
3240 * xmlParseNmtoken:
3241 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003242 *
Owen Taylor3473f882001-02-23 17:55:21 +00003243 * parse an XML Nmtoken.
3244 *
3245 * [7] Nmtoken ::= (NameChar)+
3246 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003247 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003248 *
3249 * Returns the Nmtoken parsed or NULL
3250 */
3251
3252xmlChar *
3253xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3254 xmlChar buf[XML_MAX_NAMELEN + 5];
3255 int len = 0, l;
3256 int c;
3257 int count = 0;
3258
Daniel Veillard34e3f642008-07-29 09:02:27 +00003259 nbParseNmToken++;
3260
Owen Taylor3473f882001-02-23 17:55:21 +00003261 GROW;
3262 c = CUR_CHAR(l);
3263
Daniel Veillard34e3f642008-07-29 09:02:27 +00003264 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003265 if (count++ > 100) {
3266 count = 0;
3267 GROW;
3268 }
3269 COPY_BUF(l,buf,len,c);
3270 NEXTL(l);
3271 c = CUR_CHAR(l);
3272 if (len >= XML_MAX_NAMELEN) {
3273 /*
3274 * Okay someone managed to make a huge token, so he's ready to pay
3275 * for the processing speed.
3276 */
3277 xmlChar *buffer;
3278 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003280 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003281 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003282 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003283 return(NULL);
3284 }
3285 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003286 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003287 if (count++ > 100) {
3288 count = 0;
3289 GROW;
3290 }
3291 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003292 xmlChar *tmp;
3293
Owen Taylor3473f882001-02-23 17:55:21 +00003294 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003295 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003296 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003297 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003299 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 return(NULL);
3301 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003302 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003303 }
3304 COPY_BUF(l,buffer,len,c);
3305 NEXTL(l);
3306 c = CUR_CHAR(l);
3307 }
3308 buffer[len] = 0;
3309 return(buffer);
3310 }
3311 }
3312 if (len == 0)
3313 return(NULL);
3314 return(xmlStrndup(buf, len));
3315}
3316
3317/**
3318 * xmlParseEntityValue:
3319 * @ctxt: an XML parser context
3320 * @orig: if non-NULL store a copy of the original entity value
3321 *
3322 * parse a value for ENTITY declarations
3323 *
3324 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3325 * "'" ([^%&'] | PEReference | Reference)* "'"
3326 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003327 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003328 */
3329
3330xmlChar *
3331xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3332 xmlChar *buf = NULL;
3333 int len = 0;
3334 int size = XML_PARSER_BUFFER_SIZE;
3335 int c, l;
3336 xmlChar stop;
3337 xmlChar *ret = NULL;
3338 const xmlChar *cur = NULL;
3339 xmlParserInputPtr input;
3340
3341 if (RAW == '"') stop = '"';
3342 else if (RAW == '\'') stop = '\'';
3343 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003344 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003345 return(NULL);
3346 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003347 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003348 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003349 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003350 return(NULL);
3351 }
3352
3353 /*
3354 * The content of the entity definition is copied in a buffer.
3355 */
3356
3357 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3358 input = ctxt->input;
3359 GROW;
3360 NEXT;
3361 c = CUR_CHAR(l);
3362 /*
3363 * NOTE: 4.4.5 Included in Literal
3364 * When a parameter entity reference appears in a literal entity
3365 * value, ... a single or double quote character in the replacement
3366 * text is always treated as a normal data character and will not
3367 * terminate the literal.
3368 * In practice it means we stop the loop only when back at parsing
3369 * the initial entity and the quote is found
3370 */
William M. Brack871611b2003-10-18 04:53:14 +00003371 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003372 (ctxt->input != input))) {
3373 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003374 xmlChar *tmp;
3375
Owen Taylor3473f882001-02-23 17:55:21 +00003376 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003377 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3378 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003379 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003380 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003381 return(NULL);
3382 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003383 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003384 }
3385 COPY_BUF(l,buf,len,c);
3386 NEXTL(l);
3387 /*
3388 * Pop-up of finished entities.
3389 */
3390 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3391 xmlPopInput(ctxt);
3392
3393 GROW;
3394 c = CUR_CHAR(l);
3395 if (c == 0) {
3396 GROW;
3397 c = CUR_CHAR(l);
3398 }
3399 }
3400 buf[len] = 0;
3401
3402 /*
3403 * Raise problem w.r.t. '&' and '%' being used in non-entities
3404 * reference constructs. Note Charref will be handled in
3405 * xmlStringDecodeEntities()
3406 */
3407 cur = buf;
3408 while (*cur != 0) { /* non input consuming */
3409 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3410 xmlChar *name;
3411 xmlChar tmp = *cur;
3412
3413 cur++;
3414 name = xmlParseStringName(ctxt, &cur);
3415 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003416 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003417 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003418 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003419 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003420 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3421 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003422 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003423 }
3424 if (name != NULL)
3425 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003426 if (*cur == 0)
3427 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003428 }
3429 cur++;
3430 }
3431
3432 /*
3433 * Then PEReference entities are substituted.
3434 */
3435 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003436 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003437 xmlFree(buf);
3438 } else {
3439 NEXT;
3440 /*
3441 * NOTE: 4.4.7 Bypassed
3442 * When a general entity reference appears in the EntityValue in
3443 * an entity declaration, it is bypassed and left as is.
3444 * so XML_SUBSTITUTE_REF is not set here.
3445 */
3446 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3447 0, 0, 0);
3448 if (orig != NULL)
3449 *orig = buf;
3450 else
3451 xmlFree(buf);
3452 }
3453
3454 return(ret);
3455}
3456
3457/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003458 * xmlParseAttValueComplex:
3459 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003460 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003461 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003462 *
3463 * parse a value for an attribute, this is the fallback function
3464 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003465 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003466 *
3467 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3468 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003469static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003470xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003471 xmlChar limit = 0;
3472 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003473 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003474 int len = 0;
3475 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003476 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003477 xmlChar *current = NULL;
3478 xmlEntityPtr ent;
3479
Owen Taylor3473f882001-02-23 17:55:21 +00003480 if (NXT(0) == '"') {
3481 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3482 limit = '"';
3483 NEXT;
3484 } else if (NXT(0) == '\'') {
3485 limit = '\'';
3486 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3487 NEXT;
3488 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003489 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003490 return(NULL);
3491 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003492
Owen Taylor3473f882001-02-23 17:55:21 +00003493 /*
3494 * allocate a translation buffer.
3495 */
3496 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003497 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003498 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003499
3500 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003501 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003502 */
3503 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003504 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003505 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003506 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003507 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003508 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003509 if (NXT(1) == '#') {
3510 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003511
Owen Taylor3473f882001-02-23 17:55:21 +00003512 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003513 if (ctxt->replaceEntities) {
3514 if (len > buf_size - 10) {
3515 growBuffer(buf);
3516 }
3517 buf[len++] = '&';
3518 } else {
3519 /*
3520 * The reparsing will be done in xmlStringGetNodeList()
3521 * called by the attribute() function in SAX.c
3522 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003523 if (len > buf_size - 10) {
3524 growBuffer(buf);
3525 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003526 buf[len++] = '&';
3527 buf[len++] = '#';
3528 buf[len++] = '3';
3529 buf[len++] = '8';
3530 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003531 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003532 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003533 if (len > buf_size - 10) {
3534 growBuffer(buf);
3535 }
Owen Taylor3473f882001-02-23 17:55:21 +00003536 len += xmlCopyChar(0, &buf[len], val);
3537 }
3538 } else {
3539 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003540 if ((ent != NULL) &&
3541 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3542 if (len > buf_size - 10) {
3543 growBuffer(buf);
3544 }
3545 if ((ctxt->replaceEntities == 0) &&
3546 (ent->content[0] == '&')) {
3547 buf[len++] = '&';
3548 buf[len++] = '#';
3549 buf[len++] = '3';
3550 buf[len++] = '8';
3551 buf[len++] = ';';
3552 } else {
3553 buf[len++] = ent->content[0];
3554 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003555 } else if ((ent != NULL) &&
3556 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003557 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3558 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003559 XML_SUBSTITUTE_REF,
3560 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003561 if (rep != NULL) {
3562 current = rep;
3563 while (*current != 0) { /* non input consuming */
3564 buf[len++] = *current++;
3565 if (len > buf_size - 10) {
3566 growBuffer(buf);
3567 }
3568 }
3569 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003570 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003571 }
3572 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003573 if (len > buf_size - 10) {
3574 growBuffer(buf);
3575 }
Owen Taylor3473f882001-02-23 17:55:21 +00003576 if (ent->content != NULL)
3577 buf[len++] = ent->content[0];
3578 }
3579 } else if (ent != NULL) {
3580 int i = xmlStrlen(ent->name);
3581 const xmlChar *cur = ent->name;
3582
3583 /*
3584 * This may look absurd but is needed to detect
3585 * entities problems
3586 */
3587 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3588 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003590 XML_SUBSTITUTE_REF, 0, 0, 0);
3591 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003592 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003593 rep = NULL;
3594 }
Owen Taylor3473f882001-02-23 17:55:21 +00003595 }
3596
3597 /*
3598 * Just output the reference
3599 */
3600 buf[len++] = '&';
3601 if (len > buf_size - i - 10) {
3602 growBuffer(buf);
3603 }
3604 for (;i > 0;i--)
3605 buf[len++] = *cur++;
3606 buf[len++] = ';';
3607 }
3608 }
3609 } else {
3610 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611 if ((len != 0) || (!normalize)) {
3612 if ((!normalize) || (!in_space)) {
3613 COPY_BUF(l,buf,len,0x20);
3614 if (len > buf_size - 10) {
3615 growBuffer(buf);
3616 }
3617 }
3618 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003619 }
3620 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003621 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003622 COPY_BUF(l,buf,len,c);
3623 if (len > buf_size - 10) {
3624 growBuffer(buf);
3625 }
3626 }
3627 NEXTL(l);
3628 }
3629 GROW;
3630 c = CUR_CHAR(l);
3631 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003632 if ((in_space) && (normalize)) {
3633 while (buf[len - 1] == 0x20) len--;
3634 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003635 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003637 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003638 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003639 if ((c != 0) && (!IS_CHAR(c))) {
3640 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3641 "invalid character in attribute value\n");
3642 } else {
3643 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3644 "AttValue: ' expected\n");
3645 }
Owen Taylor3473f882001-02-23 17:55:21 +00003646 } else
3647 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003648 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003649 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003650
3651mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003652 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003653 if (buf != NULL)
3654 xmlFree(buf);
3655 if (rep != NULL)
3656 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003657 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003658}
3659
3660/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003661 * xmlParseAttValue:
3662 * @ctxt: an XML parser context
3663 *
3664 * parse a value for an attribute
3665 * Note: the parser won't do substitution of entities here, this
3666 * will be handled later in xmlStringGetNodeList
3667 *
3668 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3669 * "'" ([^<&'] | Reference)* "'"
3670 *
3671 * 3.3.3 Attribute-Value Normalization:
3672 * Before the value of an attribute is passed to the application or
3673 * checked for validity, the XML processor must normalize it as follows:
3674 * - a character reference is processed by appending the referenced
3675 * character to the attribute value
3676 * - an entity reference is processed by recursively processing the
3677 * replacement text of the entity
3678 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3679 * appending #x20 to the normalized value, except that only a single
3680 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3681 * parsed entity or the literal entity value of an internal parsed entity
3682 * - other characters are processed by appending them to the normalized value
3683 * If the declared value is not CDATA, then the XML processor must further
3684 * process the normalized attribute value by discarding any leading and
3685 * trailing space (#x20) characters, and by replacing sequences of space
3686 * (#x20) characters by a single space (#x20) character.
3687 * All attributes for which no declaration has been read should be treated
3688 * by a non-validating parser as if declared CDATA.
3689 *
3690 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3691 */
3692
3693
3694xmlChar *
3695xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003696 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003697 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003698}
3699
3700/**
Owen Taylor3473f882001-02-23 17:55:21 +00003701 * xmlParseSystemLiteral:
3702 * @ctxt: an XML parser context
3703 *
3704 * parse an XML Literal
3705 *
3706 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3707 *
3708 * Returns the SystemLiteral parsed or NULL
3709 */
3710
3711xmlChar *
3712xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3713 xmlChar *buf = NULL;
3714 int len = 0;
3715 int size = XML_PARSER_BUFFER_SIZE;
3716 int cur, l;
3717 xmlChar stop;
3718 int state = ctxt->instate;
3719 int count = 0;
3720
3721 SHRINK;
3722 if (RAW == '"') {
3723 NEXT;
3724 stop = '"';
3725 } else if (RAW == '\'') {
3726 NEXT;
3727 stop = '\'';
3728 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003730 return(NULL);
3731 }
3732
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003734 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 return(NULL);
3737 }
3738 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3739 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003740 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003741 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003742 xmlChar *tmp;
3743
Owen Taylor3473f882001-02-23 17:55:21 +00003744 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003745 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3746 if (tmp == NULL) {
3747 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003748 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003749 ctxt->instate = (xmlParserInputState) state;
3750 return(NULL);
3751 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 count++;
3755 if (count > 50) {
3756 GROW;
3757 count = 0;
3758 }
3759 COPY_BUF(l,buf,len,cur);
3760 NEXTL(l);
3761 cur = CUR_CHAR(l);
3762 if (cur == 0) {
3763 GROW;
3764 SHRINK;
3765 cur = CUR_CHAR(l);
3766 }
3767 }
3768 buf[len] = 0;
3769 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003770 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003771 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003772 } else {
3773 NEXT;
3774 }
3775 return(buf);
3776}
3777
3778/**
3779 * xmlParsePubidLiteral:
3780 * @ctxt: an XML parser context
3781 *
3782 * parse an XML public literal
3783 *
3784 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3785 *
3786 * Returns the PubidLiteral parsed or NULL.
3787 */
3788
3789xmlChar *
3790xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3791 xmlChar *buf = NULL;
3792 int len = 0;
3793 int size = XML_PARSER_BUFFER_SIZE;
3794 xmlChar cur;
3795 xmlChar stop;
3796 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003797 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003798
3799 SHRINK;
3800 if (RAW == '"') {
3801 NEXT;
3802 stop = '"';
3803 } else if (RAW == '\'') {
3804 NEXT;
3805 stop = '\'';
3806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003807 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003808 return(NULL);
3809 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003810 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003811 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003812 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003813 return(NULL);
3814 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003815 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003817 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003818 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003819 xmlChar *tmp;
3820
Owen Taylor3473f882001-02-23 17:55:21 +00003821 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003822 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3823 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003824 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003825 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003826 return(NULL);
3827 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003828 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003829 }
3830 buf[len++] = cur;
3831 count++;
3832 if (count > 50) {
3833 GROW;
3834 count = 0;
3835 }
3836 NEXT;
3837 cur = CUR;
3838 if (cur == 0) {
3839 GROW;
3840 SHRINK;
3841 cur = CUR;
3842 }
3843 }
3844 buf[len] = 0;
3845 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003846 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003847 } else {
3848 NEXT;
3849 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003850 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003851 return(buf);
3852}
3853
Daniel Veillard48b2f892001-02-25 16:11:03 +00003854void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003855
3856/*
3857 * used for the test in the inner loop of the char data testing
3858 */
3859static const unsigned char test_char_data[256] = {
3860 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3861 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3862 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3863 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3864 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3865 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3866 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3867 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3868 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3869 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3870 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3871 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3872 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3873 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3874 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3875 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3876 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3877 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3878 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3879 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3880 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3881 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3882 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3883 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3884 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3885 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3886 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3887 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3888 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3889 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3890 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3891 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3892};
3893
Owen Taylor3473f882001-02-23 17:55:21 +00003894/**
3895 * xmlParseCharData:
3896 * @ctxt: an XML parser context
3897 * @cdata: int indicating whether we are within a CDATA section
3898 *
3899 * parse a CharData section.
3900 * if we are within a CDATA section ']]>' marks an end of section.
3901 *
3902 * The right angle bracket (>) may be represented using the string "&gt;",
3903 * and must, for compatibility, be escaped using "&gt;" or a character
3904 * reference when it appears in the string "]]>" in content, when that
3905 * string is not marking the end of a CDATA section.
3906 *
3907 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3908 */
3909
3910void
3911xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003912 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003913 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003914 int line = ctxt->input->line;
3915 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003916 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003917
3918 SHRINK;
3919 GROW;
3920 /*
3921 * Accelerated common case where input don't need to be
3922 * modified before passing it to the handler.
3923 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003924 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003925 in = ctxt->input->cur;
3926 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003927get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003928 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003929 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003930 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003931 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003932 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003933 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003934 goto get_more_space;
3935 }
3936 if (*in == '<') {
3937 nbchar = in - ctxt->input->cur;
3938 if (nbchar > 0) {
3939 const xmlChar *tmp = ctxt->input->cur;
3940 ctxt->input->cur = in;
3941
Daniel Veillard34099b42004-11-04 17:34:35 +00003942 if ((ctxt->sax != NULL) &&
3943 (ctxt->sax->ignorableWhitespace !=
3944 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003945 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003946 if (ctxt->sax->ignorableWhitespace != NULL)
3947 ctxt->sax->ignorableWhitespace(ctxt->userData,
3948 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003949 } else {
3950 if (ctxt->sax->characters != NULL)
3951 ctxt->sax->characters(ctxt->userData,
3952 tmp, nbchar);
3953 if (*ctxt->space == -1)
3954 *ctxt->space = -2;
3955 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003956 } else if ((ctxt->sax != NULL) &&
3957 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003958 ctxt->sax->characters(ctxt->userData,
3959 tmp, nbchar);
3960 }
3961 }
3962 return;
3963 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003964
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003965get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003966 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003967 while (test_char_data[*in]) {
3968 in++;
3969 ccol++;
3970 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003971 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003972 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003973 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003974 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003975 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003976 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003977 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003978 }
3979 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003980 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003981 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003982 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003983 return;
3984 }
3985 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003986 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003987 goto get_more;
3988 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003989 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003990 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003991 if ((ctxt->sax != NULL) &&
3992 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003993 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003994 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003995 const xmlChar *tmp = ctxt->input->cur;
3996 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003997
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003998 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003999 if (ctxt->sax->ignorableWhitespace != NULL)
4000 ctxt->sax->ignorableWhitespace(ctxt->userData,
4001 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004002 } else {
4003 if (ctxt->sax->characters != NULL)
4004 ctxt->sax->characters(ctxt->userData,
4005 tmp, nbchar);
4006 if (*ctxt->space == -1)
4007 *ctxt->space = -2;
4008 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004009 line = ctxt->input->line;
4010 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004011 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004012 if (ctxt->sax->characters != NULL)
4013 ctxt->sax->characters(ctxt->userData,
4014 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004015 line = ctxt->input->line;
4016 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004017 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004018 }
4019 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004020 if (*in == 0xD) {
4021 in++;
4022 if (*in == 0xA) {
4023 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004024 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004025 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004026 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004027 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004028 in--;
4029 }
4030 if (*in == '<') {
4031 return;
4032 }
4033 if (*in == '&') {
4034 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004035 }
4036 SHRINK;
4037 GROW;
4038 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004039 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004040 nbchar = 0;
4041 }
Daniel Veillard50582112001-03-26 22:52:16 +00004042 ctxt->input->line = line;
4043 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004044 xmlParseCharDataComplex(ctxt, cdata);
4045}
4046
Daniel Veillard01c13b52002-12-10 15:19:08 +00004047/**
4048 * xmlParseCharDataComplex:
4049 * @ctxt: an XML parser context
4050 * @cdata: int indicating whether we are within a CDATA section
4051 *
4052 * parse a CharData section.this is the fallback function
4053 * of xmlParseCharData() when the parsing requires handling
4054 * of non-ASCII characters.
4055 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004056void
4057xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004058 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4059 int nbchar = 0;
4060 int cur, l;
4061 int count = 0;
4062
4063 SHRINK;
4064 GROW;
4065 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004066 while ((cur != '<') && /* checked */
4067 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004068 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004069 if ((cur == ']') && (NXT(1) == ']') &&
4070 (NXT(2) == '>')) {
4071 if (cdata) break;
4072 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004073 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004074 }
4075 }
4076 COPY_BUF(l,buf,nbchar,cur);
4077 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004078 buf[nbchar] = 0;
4079
Owen Taylor3473f882001-02-23 17:55:21 +00004080 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004081 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004082 */
4083 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004084 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004085 if (ctxt->sax->ignorableWhitespace != NULL)
4086 ctxt->sax->ignorableWhitespace(ctxt->userData,
4087 buf, nbchar);
4088 } else {
4089 if (ctxt->sax->characters != NULL)
4090 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004091 if ((ctxt->sax->characters !=
4092 ctxt->sax->ignorableWhitespace) &&
4093 (*ctxt->space == -1))
4094 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004095 }
4096 }
4097 nbchar = 0;
4098 }
4099 count++;
4100 if (count > 50) {
4101 GROW;
4102 count = 0;
4103 }
4104 NEXTL(l);
4105 cur = CUR_CHAR(l);
4106 }
4107 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004108 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004109 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004110 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004111 */
4112 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004113 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004114 if (ctxt->sax->ignorableWhitespace != NULL)
4115 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4116 } else {
4117 if (ctxt->sax->characters != NULL)
4118 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004119 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4120 (*ctxt->space == -1))
4121 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004122 }
4123 }
4124 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004125 if ((cur != 0) && (!IS_CHAR(cur))) {
4126 /* Generate the error and skip the offending character */
4127 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4128 "PCDATA invalid Char value %d\n",
4129 cur);
4130 NEXTL(l);
4131 }
Owen Taylor3473f882001-02-23 17:55:21 +00004132}
4133
4134/**
4135 * xmlParseExternalID:
4136 * @ctxt: an XML parser context
4137 * @publicID: a xmlChar** receiving PubidLiteral
4138 * @strict: indicate whether we should restrict parsing to only
4139 * production [75], see NOTE below
4140 *
4141 * Parse an External ID or a Public ID
4142 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004143 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004144 * 'PUBLIC' S PubidLiteral S SystemLiteral
4145 *
4146 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4147 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4148 *
4149 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4150 *
4151 * Returns the function returns SystemLiteral and in the second
4152 * case publicID receives PubidLiteral, is strict is off
4153 * it is possible to return NULL and have publicID set.
4154 */
4155
4156xmlChar *
4157xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4158 xmlChar *URI = NULL;
4159
4160 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004161
4162 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004163 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004164 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004165 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004166 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4167 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004168 }
4169 SKIP_BLANKS;
4170 URI = xmlParseSystemLiteral(ctxt);
4171 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004172 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004173 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004174 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004175 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004176 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004177 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004178 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004179 }
4180 SKIP_BLANKS;
4181 *publicID = xmlParsePubidLiteral(ctxt);
4182 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004183 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 }
4185 if (strict) {
4186 /*
4187 * We don't handle [83] so "S SystemLiteral" is required.
4188 */
William M. Brack76e95df2003-10-18 16:20:14 +00004189 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004190 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004191 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 } else {
4194 /*
4195 * We handle [83] so we return immediately, if
4196 * "S SystemLiteral" is not detected. From a purely parsing
4197 * point of view that's a nice mess.
4198 */
4199 const xmlChar *ptr;
4200 GROW;
4201
4202 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004203 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004204
William M. Brack76e95df2003-10-18 16:20:14 +00004205 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004206 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4207 }
4208 SKIP_BLANKS;
4209 URI = xmlParseSystemLiteral(ctxt);
4210 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 }
4213 }
4214 return(URI);
4215}
4216
4217/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004218 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004219 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004220 * @buf: the already parsed part of the buffer
4221 * @len: number of bytes filles in the buffer
4222 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004223 *
4224 * Skip an XML (SGML) comment <!-- .... -->
4225 * The spec says that "For compatibility, the string "--" (double-hyphen)
4226 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004227 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004228 *
4229 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4230 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004231static void
4232xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004233 int q, ql;
4234 int r, rl;
4235 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004236 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004237 int inputid;
4238
4239 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004240
Owen Taylor3473f882001-02-23 17:55:21 +00004241 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004242 len = 0;
4243 size = XML_PARSER_BUFFER_SIZE;
4244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4245 if (buf == NULL) {
4246 xmlErrMemory(ctxt, NULL);
4247 return;
4248 }
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004250 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004251 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004252 if (q == 0)
4253 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004254 if (!IS_CHAR(q)) {
4255 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4256 "xmlParseComment: invalid xmlChar value %d\n",
4257 q);
4258 xmlFree (buf);
4259 return;
4260 }
Owen Taylor3473f882001-02-23 17:55:21 +00004261 NEXTL(ql);
4262 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004263 if (r == 0)
4264 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004265 if (!IS_CHAR(r)) {
4266 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4267 "xmlParseComment: invalid xmlChar value %d\n",
4268 q);
4269 xmlFree (buf);
4270 return;
4271 }
Owen Taylor3473f882001-02-23 17:55:21 +00004272 NEXTL(rl);
4273 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004274 if (cur == 0)
4275 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004276 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004277 ((cur != '>') ||
4278 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004279 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004280 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004281 }
4282 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004283 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004285 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4286 if (new_buf == NULL) {
4287 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004288 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 return;
4290 }
William M. Bracka3215c72004-07-31 16:24:01 +00004291 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004292 }
4293 COPY_BUF(ql,buf,len,q);
4294 q = r;
4295 ql = rl;
4296 r = cur;
4297 rl = l;
4298
4299 count++;
4300 if (count > 50) {
4301 GROW;
4302 count = 0;
4303 }
4304 NEXTL(l);
4305 cur = CUR_CHAR(l);
4306 if (cur == 0) {
4307 SHRINK;
4308 GROW;
4309 cur = CUR_CHAR(l);
4310 }
4311 }
4312 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004313 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004314 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004315 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004316 } else if (!IS_CHAR(cur)) {
4317 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4318 "xmlParseComment: invalid xmlChar value %d\n",
4319 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004320 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004321 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004322 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4323 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004324 }
4325 NEXT;
4326 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4327 (!ctxt->disableSAX))
4328 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
Daniel Veillardda629342007-08-01 07:49:06 +00004330 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004331 return;
4332not_terminated:
4333 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4334 "Comment not terminated\n", NULL);
4335 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004336 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004337}
Daniel Veillardda629342007-08-01 07:49:06 +00004338
Daniel Veillard4c778d82005-01-23 17:37:44 +00004339/**
4340 * xmlParseComment:
4341 * @ctxt: an XML parser context
4342 *
4343 * Skip an XML (SGML) comment <!-- .... -->
4344 * The spec says that "For compatibility, the string "--" (double-hyphen)
4345 * must not occur within comments. "
4346 *
4347 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4348 */
4349void
4350xmlParseComment(xmlParserCtxtPtr ctxt) {
4351 xmlChar *buf = NULL;
4352 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004353 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004354 xmlParserInputState state;
4355 const xmlChar *in;
4356 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004357 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004358
4359 /*
4360 * Check that there is a comment right here.
4361 */
4362 if ((RAW != '<') || (NXT(1) != '!') ||
4363 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004364 state = ctxt->instate;
4365 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004366 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004367 SKIP(4);
4368 SHRINK;
4369 GROW;
4370
4371 /*
4372 * Accelerated common case where input don't need to be
4373 * modified before passing it to the handler.
4374 */
4375 in = ctxt->input->cur;
4376 do {
4377 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004378 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004379 ctxt->input->line++; ctxt->input->col = 1;
4380 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004381 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004382 }
4383get_more:
4384 ccol = ctxt->input->col;
4385 while (((*in > '-') && (*in <= 0x7F)) ||
4386 ((*in >= 0x20) && (*in < '-')) ||
4387 (*in == 0x09)) {
4388 in++;
4389 ccol++;
4390 }
4391 ctxt->input->col = ccol;
4392 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004393 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004394 ctxt->input->line++; ctxt->input->col = 1;
4395 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004396 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004397 goto get_more;
4398 }
4399 nbchar = in - ctxt->input->cur;
4400 /*
4401 * save current set of data
4402 */
4403 if (nbchar > 0) {
4404 if ((ctxt->sax != NULL) &&
4405 (ctxt->sax->comment != NULL)) {
4406 if (buf == NULL) {
4407 if ((*in == '-') && (in[1] == '-'))
4408 size = nbchar + 1;
4409 else
4410 size = XML_PARSER_BUFFER_SIZE + nbchar;
4411 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4412 if (buf == NULL) {
4413 xmlErrMemory(ctxt, NULL);
4414 ctxt->instate = state;
4415 return;
4416 }
4417 len = 0;
4418 } else if (len + nbchar + 1 >= size) {
4419 xmlChar *new_buf;
4420 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4421 new_buf = (xmlChar *) xmlRealloc(buf,
4422 size * sizeof(xmlChar));
4423 if (new_buf == NULL) {
4424 xmlFree (buf);
4425 xmlErrMemory(ctxt, NULL);
4426 ctxt->instate = state;
4427 return;
4428 }
4429 buf = new_buf;
4430 }
4431 memcpy(&buf[len], ctxt->input->cur, nbchar);
4432 len += nbchar;
4433 buf[len] = 0;
4434 }
4435 }
4436 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004437 if (*in == 0xA) {
4438 in++;
4439 ctxt->input->line++; ctxt->input->col = 1;
4440 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004441 if (*in == 0xD) {
4442 in++;
4443 if (*in == 0xA) {
4444 ctxt->input->cur = in;
4445 in++;
4446 ctxt->input->line++; ctxt->input->col = 1;
4447 continue; /* while */
4448 }
4449 in--;
4450 }
4451 SHRINK;
4452 GROW;
4453 in = ctxt->input->cur;
4454 if (*in == '-') {
4455 if (in[1] == '-') {
4456 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004457 if (ctxt->input->id != inputid) {
4458 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4459 "comment doesn't start and stop in the same entity\n");
4460 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004461 SKIP(3);
4462 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4463 (!ctxt->disableSAX)) {
4464 if (buf != NULL)
4465 ctxt->sax->comment(ctxt->userData, buf);
4466 else
4467 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4468 }
4469 if (buf != NULL)
4470 xmlFree(buf);
4471 ctxt->instate = state;
4472 return;
4473 }
4474 if (buf != NULL)
4475 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4476 "Comment not terminated \n<!--%.50s\n",
4477 buf);
4478 else
4479 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4480 "Comment not terminated \n", NULL);
4481 in++;
4482 ctxt->input->col++;
4483 }
4484 in++;
4485 ctxt->input->col++;
4486 goto get_more;
4487 }
4488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4489 xmlParseCommentComplex(ctxt, buf, len, size);
4490 ctxt->instate = state;
4491 return;
4492}
4493
Owen Taylor3473f882001-02-23 17:55:21 +00004494
4495/**
4496 * xmlParsePITarget:
4497 * @ctxt: an XML parser context
4498 *
4499 * parse the name of a PI
4500 *
4501 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4502 *
4503 * Returns the PITarget name or NULL
4504 */
4505
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004506const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004507xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004508 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004509
4510 name = xmlParseName(ctxt);
4511 if ((name != NULL) &&
4512 ((name[0] == 'x') || (name[0] == 'X')) &&
4513 ((name[1] == 'm') || (name[1] == 'M')) &&
4514 ((name[2] == 'l') || (name[2] == 'L'))) {
4515 int i;
4516 if ((name[0] == 'x') && (name[1] == 'm') &&
4517 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004518 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004519 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004520 return(name);
4521 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004522 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004523 return(name);
4524 }
4525 for (i = 0;;i++) {
4526 if (xmlW3CPIs[i] == NULL) break;
4527 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4528 return(name);
4529 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004530 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4531 "xmlParsePITarget: invalid name prefix 'xml'\n",
4532 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004533 }
Daniel Veillard37334572008-07-31 08:20:02 +00004534 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4535 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4536 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4537 }
Owen Taylor3473f882001-02-23 17:55:21 +00004538 return(name);
4539}
4540
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004541#ifdef LIBXML_CATALOG_ENABLED
4542/**
4543 * xmlParseCatalogPI:
4544 * @ctxt: an XML parser context
4545 * @catalog: the PI value string
4546 *
4547 * parse an XML Catalog Processing Instruction.
4548 *
4549 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4550 *
4551 * Occurs only if allowed by the user and if happening in the Misc
4552 * part of the document before any doctype informations
4553 * This will add the given catalog to the parsing context in order
4554 * to be used if there is a resolution need further down in the document
4555 */
4556
4557static void
4558xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4559 xmlChar *URL = NULL;
4560 const xmlChar *tmp, *base;
4561 xmlChar marker;
4562
4563 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004564 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004565 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4566 goto error;
4567 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004568 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004569 if (*tmp != '=') {
4570 return;
4571 }
4572 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004573 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004574 marker = *tmp;
4575 if ((marker != '\'') && (marker != '"'))
4576 goto error;
4577 tmp++;
4578 base = tmp;
4579 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4580 if (*tmp == 0)
4581 goto error;
4582 URL = xmlStrndup(base, tmp - base);
4583 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004584 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004585 if (*tmp != 0)
4586 goto error;
4587
4588 if (URL != NULL) {
4589 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4590 xmlFree(URL);
4591 }
4592 return;
4593
4594error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004595 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4596 "Catalog PI syntax error: %s\n",
4597 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004598 if (URL != NULL)
4599 xmlFree(URL);
4600}
4601#endif
4602
Owen Taylor3473f882001-02-23 17:55:21 +00004603/**
4604 * xmlParsePI:
4605 * @ctxt: an XML parser context
4606 *
4607 * parse an XML Processing Instruction.
4608 *
4609 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4610 *
4611 * The processing is transfered to SAX once parsed.
4612 */
4613
4614void
4615xmlParsePI(xmlParserCtxtPtr ctxt) {
4616 xmlChar *buf = NULL;
4617 int len = 0;
4618 int size = XML_PARSER_BUFFER_SIZE;
4619 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004620 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 xmlParserInputState state;
4622 int count = 0;
4623
4624 if ((RAW == '<') && (NXT(1) == '?')) {
4625 xmlParserInputPtr input = ctxt->input;
4626 state = ctxt->instate;
4627 ctxt->instate = XML_PARSER_PI;
4628 /*
4629 * this is a Processing Instruction.
4630 */
4631 SKIP(2);
4632 SHRINK;
4633
4634 /*
4635 * Parse the target name and check for special support like
4636 * namespace.
4637 */
4638 target = xmlParsePITarget(ctxt);
4639 if (target != NULL) {
4640 if ((RAW == '?') && (NXT(1) == '>')) {
4641 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004642 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4643 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004644 }
4645 SKIP(2);
4646
4647 /*
4648 * SAX: PI detected.
4649 */
4650 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4651 (ctxt->sax->processingInstruction != NULL))
4652 ctxt->sax->processingInstruction(ctxt->userData,
4653 target, NULL);
4654 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004655 return;
4656 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004657 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004658 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004659 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004660 ctxt->instate = state;
4661 return;
4662 }
4663 cur = CUR;
4664 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004665 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4666 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004667 }
4668 SKIP_BLANKS;
4669 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004670 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004671 ((cur != '?') || (NXT(1) != '>'))) {
4672 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004673 xmlChar *tmp;
4674
Owen Taylor3473f882001-02-23 17:55:21 +00004675 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004676 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4677 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004679 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004680 ctxt->instate = state;
4681 return;
4682 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004683 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 count++;
4686 if (count > 50) {
4687 GROW;
4688 count = 0;
4689 }
4690 COPY_BUF(l,buf,len,cur);
4691 NEXTL(l);
4692 cur = CUR_CHAR(l);
4693 if (cur == 0) {
4694 SHRINK;
4695 GROW;
4696 cur = CUR_CHAR(l);
4697 }
4698 }
4699 buf[len] = 0;
4700 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004701 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4702 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004703 } else {
4704 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4706 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004707 }
4708 SKIP(2);
4709
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004710#ifdef LIBXML_CATALOG_ENABLED
4711 if (((state == XML_PARSER_MISC) ||
4712 (state == XML_PARSER_START)) &&
4713 (xmlStrEqual(target, XML_CATALOG_PI))) {
4714 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4715 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4716 (allow == XML_CATA_ALLOW_ALL))
4717 xmlParseCatalogPI(ctxt, buf);
4718 }
4719#endif
4720
4721
Owen Taylor3473f882001-02-23 17:55:21 +00004722 /*
4723 * SAX: PI detected.
4724 */
4725 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4726 (ctxt->sax->processingInstruction != NULL))
4727 ctxt->sax->processingInstruction(ctxt->userData,
4728 target, buf);
4729 }
4730 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004731 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004732 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 }
4734 ctxt->instate = state;
4735 }
4736}
4737
4738/**
4739 * xmlParseNotationDecl:
4740 * @ctxt: an XML parser context
4741 *
4742 * parse a notation declaration
4743 *
4744 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4745 *
4746 * Hence there is actually 3 choices:
4747 * 'PUBLIC' S PubidLiteral
4748 * 'PUBLIC' S PubidLiteral S SystemLiteral
4749 * and 'SYSTEM' S SystemLiteral
4750 *
4751 * See the NOTE on xmlParseExternalID().
4752 */
4753
4754void
4755xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004756 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004757 xmlChar *Pubid;
4758 xmlChar *Systemid;
4759
Daniel Veillarda07050d2003-10-19 14:46:32 +00004760 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004761 xmlParserInputPtr input = ctxt->input;
4762 SHRINK;
4763 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4766 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004767 return;
4768 }
4769 SKIP_BLANKS;
4770
Daniel Veillard76d66f42001-05-16 21:05:17 +00004771 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004772 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004773 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004774 return;
4775 }
William M. Brack76e95df2003-10-18 16:20:14 +00004776 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004777 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004778 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004779 return;
4780 }
Daniel Veillard37334572008-07-31 08:20:02 +00004781 if (xmlStrchr(name, ':') != NULL) {
4782 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4783 "colon are forbidden from notation names '%s'\n",
4784 name, NULL, NULL);
4785 }
Owen Taylor3473f882001-02-23 17:55:21 +00004786 SKIP_BLANKS;
4787
4788 /*
4789 * Parse the IDs.
4790 */
4791 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4792 SKIP_BLANKS;
4793
4794 if (RAW == '>') {
4795 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004796 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4797 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004798 }
4799 NEXT;
4800 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->notationDecl != NULL))
4802 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4803 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004804 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004805 }
Owen Taylor3473f882001-02-23 17:55:21 +00004806 if (Systemid != NULL) xmlFree(Systemid);
4807 if (Pubid != NULL) xmlFree(Pubid);
4808 }
4809}
4810
4811/**
4812 * xmlParseEntityDecl:
4813 * @ctxt: an XML parser context
4814 *
4815 * parse <!ENTITY declarations
4816 *
4817 * [70] EntityDecl ::= GEDecl | PEDecl
4818 *
4819 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4820 *
4821 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4822 *
4823 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4824 *
4825 * [74] PEDef ::= EntityValue | ExternalID
4826 *
4827 * [76] NDataDecl ::= S 'NDATA' S Name
4828 *
4829 * [ VC: Notation Declared ]
4830 * The Name must match the declared name of a notation.
4831 */
4832
4833void
4834xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004835 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004836 xmlChar *value = NULL;
4837 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004838 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004839 int isParameter = 0;
4840 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004841 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004842
Daniel Veillard4c778d82005-01-23 17:37:44 +00004843 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004844 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004845 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004846 SHRINK;
4847 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004848 skipped = SKIP_BLANKS;
4849 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004850 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4851 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004852 }
Owen Taylor3473f882001-02-23 17:55:21 +00004853
4854 if (RAW == '%') {
4855 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004856 skipped = SKIP_BLANKS;
4857 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004858 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4859 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004860 }
Owen Taylor3473f882001-02-23 17:55:21 +00004861 isParameter = 1;
4862 }
4863
Daniel Veillard76d66f42001-05-16 21:05:17 +00004864 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004865 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004866 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4867 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004868 return;
4869 }
Daniel Veillard37334572008-07-31 08:20:02 +00004870 if (xmlStrchr(name, ':') != NULL) {
4871 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4872 "colon are forbidden from entities names '%s'\n",
4873 name, NULL, NULL);
4874 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004875 skipped = SKIP_BLANKS;
4876 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004877 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4878 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004879 }
Owen Taylor3473f882001-02-23 17:55:21 +00004880
Daniel Veillardf5582f12002-06-11 10:08:16 +00004881 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004882 /*
4883 * handle the various case of definitions...
4884 */
4885 if (isParameter) {
4886 if ((RAW == '"') || (RAW == '\'')) {
4887 value = xmlParseEntityValue(ctxt, &orig);
4888 if (value) {
4889 if ((ctxt->sax != NULL) &&
4890 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4891 ctxt->sax->entityDecl(ctxt->userData, name,
4892 XML_INTERNAL_PARAMETER_ENTITY,
4893 NULL, NULL, value);
4894 }
4895 } else {
4896 URI = xmlParseExternalID(ctxt, &literal, 1);
4897 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004898 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004899 }
4900 if (URI) {
4901 xmlURIPtr uri;
4902
4903 uri = xmlParseURI((const char *) URI);
4904 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004905 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4906 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004907 /*
4908 * This really ought to be a well formedness error
4909 * but the XML Core WG decided otherwise c.f. issue
4910 * E26 of the XML erratas.
4911 */
Owen Taylor3473f882001-02-23 17:55:21 +00004912 } else {
4913 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004914 /*
4915 * Okay this is foolish to block those but not
4916 * invalid URIs.
4917 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004918 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 } else {
4920 if ((ctxt->sax != NULL) &&
4921 (!ctxt->disableSAX) &&
4922 (ctxt->sax->entityDecl != NULL))
4923 ctxt->sax->entityDecl(ctxt->userData, name,
4924 XML_EXTERNAL_PARAMETER_ENTITY,
4925 literal, URI, NULL);
4926 }
4927 xmlFreeURI(uri);
4928 }
4929 }
4930 }
4931 } else {
4932 if ((RAW == '"') || (RAW == '\'')) {
4933 value = xmlParseEntityValue(ctxt, &orig);
4934 if ((ctxt->sax != NULL) &&
4935 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4936 ctxt->sax->entityDecl(ctxt->userData, name,
4937 XML_INTERNAL_GENERAL_ENTITY,
4938 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004939 /*
4940 * For expat compatibility in SAX mode.
4941 */
4942 if ((ctxt->myDoc == NULL) ||
4943 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4944 if (ctxt->myDoc == NULL) {
4945 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004946 if (ctxt->myDoc == NULL) {
4947 xmlErrMemory(ctxt, "New Doc failed");
4948 return;
4949 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00004950 }
4951 if (ctxt->myDoc->intSubset == NULL)
4952 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4953 BAD_CAST "fake", NULL, NULL);
4954
Daniel Veillard1af9a412003-08-20 22:54:39 +00004955 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4956 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004957 }
Owen Taylor3473f882001-02-23 17:55:21 +00004958 } else {
4959 URI = xmlParseExternalID(ctxt, &literal, 1);
4960 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004961 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 }
4963 if (URI) {
4964 xmlURIPtr uri;
4965
4966 uri = xmlParseURI((const char *)URI);
4967 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004968 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4969 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004970 /*
4971 * This really ought to be a well formedness error
4972 * but the XML Core WG decided otherwise c.f. issue
4973 * E26 of the XML erratas.
4974 */
Owen Taylor3473f882001-02-23 17:55:21 +00004975 } else {
4976 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004977 /*
4978 * Okay this is foolish to block those but not
4979 * invalid URIs.
4980 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004981 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004982 }
4983 xmlFreeURI(uri);
4984 }
4985 }
William M. Brack76e95df2003-10-18 16:20:14 +00004986 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004987 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4988 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004989 }
4990 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004991 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004992 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004993 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4995 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004998 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5000 (ctxt->sax->unparsedEntityDecl != NULL))
5001 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5002 literal, URI, ndata);
5003 } else {
5004 if ((ctxt->sax != NULL) &&
5005 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5006 ctxt->sax->entityDecl(ctxt->userData, name,
5007 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5008 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005009 /*
5010 * For expat compatibility in SAX mode.
5011 * assuming the entity repalcement was asked for
5012 */
5013 if ((ctxt->replaceEntities != 0) &&
5014 ((ctxt->myDoc == NULL) ||
5015 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5016 if (ctxt->myDoc == NULL) {
5017 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005018 if (ctxt->myDoc == NULL) {
5019 xmlErrMemory(ctxt, "New Doc failed");
5020 return;
5021 }
Daniel Veillard5997aca2002-03-18 18:36:20 +00005022 }
5023
5024 if (ctxt->myDoc->intSubset == NULL)
5025 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5026 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005027 xmlSAX2EntityDecl(ctxt, name,
5028 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5029 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005030 }
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
5032 }
5033 }
5034 SKIP_BLANKS;
5035 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005036 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005037 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005038 } else {
5039 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005040 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5041 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005042 }
5043 NEXT;
5044 }
5045 if (orig != NULL) {
5046 /*
5047 * Ugly mechanism to save the raw entity value.
5048 */
5049 xmlEntityPtr cur = NULL;
5050
5051 if (isParameter) {
5052 if ((ctxt->sax != NULL) &&
5053 (ctxt->sax->getParameterEntity != NULL))
5054 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5055 } else {
5056 if ((ctxt->sax != NULL) &&
5057 (ctxt->sax->getEntity != NULL))
5058 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005059 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005060 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005061 }
Owen Taylor3473f882001-02-23 17:55:21 +00005062 }
5063 if (cur != NULL) {
5064 if (cur->orig != NULL)
5065 xmlFree(orig);
5066 else
5067 cur->orig = orig;
5068 } else
5069 xmlFree(orig);
5070 }
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if (value != NULL) xmlFree(value);
5072 if (URI != NULL) xmlFree(URI);
5073 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005074 }
5075}
5076
5077/**
5078 * xmlParseDefaultDecl:
5079 * @ctxt: an XML parser context
5080 * @value: Receive a possible fixed default value for the attribute
5081 *
5082 * Parse an attribute default declaration
5083 *
5084 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5085 *
5086 * [ VC: Required Attribute ]
5087 * if the default declaration is the keyword #REQUIRED, then the
5088 * attribute must be specified for all elements of the type in the
5089 * attribute-list declaration.
5090 *
5091 * [ VC: Attribute Default Legal ]
5092 * The declared default value must meet the lexical constraints of
5093 * the declared attribute type c.f. xmlValidateAttributeDecl()
5094 *
5095 * [ VC: Fixed Attribute Default ]
5096 * if an attribute has a default value declared with the #FIXED
5097 * keyword, instances of that attribute must match the default value.
5098 *
5099 * [ WFC: No < in Attribute Values ]
5100 * handled in xmlParseAttValue()
5101 *
5102 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5103 * or XML_ATTRIBUTE_FIXED.
5104 */
5105
5106int
5107xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5108 int val;
5109 xmlChar *ret;
5110
5111 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005112 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005113 SKIP(9);
5114 return(XML_ATTRIBUTE_REQUIRED);
5115 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005116 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005117 SKIP(8);
5118 return(XML_ATTRIBUTE_IMPLIED);
5119 }
5120 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005121 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005122 SKIP(6);
5123 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005124 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005125 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5126 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005127 }
5128 SKIP_BLANKS;
5129 }
5130 ret = xmlParseAttValue(ctxt);
5131 ctxt->instate = XML_PARSER_DTD;
5132 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005133 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005134 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005135 } else
5136 *value = ret;
5137 return(val);
5138}
5139
5140/**
5141 * xmlParseNotationType:
5142 * @ctxt: an XML parser context
5143 *
5144 * parse an Notation attribute type.
5145 *
5146 * Note: the leading 'NOTATION' S part has already being parsed...
5147 *
5148 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5149 *
5150 * [ VC: Notation Attributes ]
5151 * Values of this type must match one of the notation names included
5152 * in the declaration; all notation names in the declaration must be declared.
5153 *
5154 * Returns: the notation attribute tree built while parsing
5155 */
5156
5157xmlEnumerationPtr
5158xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005159 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005160 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5161
5162 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005163 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005164 return(NULL);
5165 }
5166 SHRINK;
5167 do {
5168 NEXT;
5169 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005170 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005171 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5173 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005174 return(ret);
5175 }
5176 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 if (cur == NULL) return(ret);
5178 if (last == NULL) ret = last = cur;
5179 else {
5180 last->next = cur;
5181 last = cur;
5182 }
5183 SKIP_BLANKS;
5184 } while (RAW == '|');
5185 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005186 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005187 if ((last != NULL) && (last != ret))
5188 xmlFreeEnumeration(last);
5189 return(ret);
5190 }
5191 NEXT;
5192 return(ret);
5193}
5194
5195/**
5196 * xmlParseEnumerationType:
5197 * @ctxt: an XML parser context
5198 *
5199 * parse an Enumeration attribute type.
5200 *
5201 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5202 *
5203 * [ VC: Enumeration ]
5204 * Values of this type must match one of the Nmtoken tokens in
5205 * the declaration
5206 *
5207 * Returns: the enumeration attribute tree built while parsing
5208 */
5209
5210xmlEnumerationPtr
5211xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5212 xmlChar *name;
5213 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5214
5215 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005216 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 return(NULL);
5218 }
5219 SHRINK;
5220 do {
5221 NEXT;
5222 SKIP_BLANKS;
5223 name = xmlParseNmtoken(ctxt);
5224 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005225 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 return(ret);
5227 }
5228 cur = xmlCreateEnumeration(name);
5229 xmlFree(name);
5230 if (cur == NULL) return(ret);
5231 if (last == NULL) ret = last = cur;
5232 else {
5233 last->next = cur;
5234 last = cur;
5235 }
5236 SKIP_BLANKS;
5237 } while (RAW == '|');
5238 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005239 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 return(ret);
5241 }
5242 NEXT;
5243 return(ret);
5244}
5245
5246/**
5247 * xmlParseEnumeratedType:
5248 * @ctxt: an XML parser context
5249 * @tree: the enumeration tree built while parsing
5250 *
5251 * parse an Enumerated attribute type.
5252 *
5253 * [57] EnumeratedType ::= NotationType | Enumeration
5254 *
5255 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5256 *
5257 *
5258 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5259 */
5260
5261int
5262xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005263 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005264 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005265 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5267 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005268 return(0);
5269 }
5270 SKIP_BLANKS;
5271 *tree = xmlParseNotationType(ctxt);
5272 if (*tree == NULL) return(0);
5273 return(XML_ATTRIBUTE_NOTATION);
5274 }
5275 *tree = xmlParseEnumerationType(ctxt);
5276 if (*tree == NULL) return(0);
5277 return(XML_ATTRIBUTE_ENUMERATION);
5278}
5279
5280/**
5281 * xmlParseAttributeType:
5282 * @ctxt: an XML parser context
5283 * @tree: the enumeration tree built while parsing
5284 *
5285 * parse the Attribute list def for an element
5286 *
5287 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5288 *
5289 * [55] StringType ::= 'CDATA'
5290 *
5291 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5292 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5293 *
5294 * Validity constraints for attribute values syntax are checked in
5295 * xmlValidateAttributeValue()
5296 *
5297 * [ VC: ID ]
5298 * Values of type ID must match the Name production. A name must not
5299 * appear more than once in an XML document as a value of this type;
5300 * i.e., ID values must uniquely identify the elements which bear them.
5301 *
5302 * [ VC: One ID per Element Type ]
5303 * No element type may have more than one ID attribute specified.
5304 *
5305 * [ VC: ID Attribute Default ]
5306 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5307 *
5308 * [ VC: IDREF ]
5309 * Values of type IDREF must match the Name production, and values
5310 * of type IDREFS must match Names; each IDREF Name must match the value
5311 * of an ID attribute on some element in the XML document; i.e. IDREF
5312 * values must match the value of some ID attribute.
5313 *
5314 * [ VC: Entity Name ]
5315 * Values of type ENTITY must match the Name production, values
5316 * of type ENTITIES must match Names; each Entity Name must match the
5317 * name of an unparsed entity declared in the DTD.
5318 *
5319 * [ VC: Name Token ]
5320 * Values of type NMTOKEN must match the Nmtoken production; values
5321 * of type NMTOKENS must match Nmtokens.
5322 *
5323 * Returns the attribute type
5324 */
5325int
5326xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5327 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005328 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005329 SKIP(5);
5330 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005331 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005332 SKIP(6);
5333 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005334 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005335 SKIP(5);
5336 return(XML_ATTRIBUTE_IDREF);
5337 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5338 SKIP(2);
5339 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005340 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005341 SKIP(6);
5342 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005343 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005344 SKIP(8);
5345 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005346 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005347 SKIP(8);
5348 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005349 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005350 SKIP(7);
5351 return(XML_ATTRIBUTE_NMTOKEN);
5352 }
5353 return(xmlParseEnumeratedType(ctxt, tree));
5354}
5355
5356/**
5357 * xmlParseAttributeListDecl:
5358 * @ctxt: an XML parser context
5359 *
5360 * : parse the Attribute list def for an element
5361 *
5362 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5363 *
5364 * [53] AttDef ::= S Name S AttType S DefaultDecl
5365 *
5366 */
5367void
5368xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005369 const xmlChar *elemName;
5370 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005371 xmlEnumerationPtr tree;
5372
Daniel Veillarda07050d2003-10-19 14:46:32 +00005373 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005374 xmlParserInputPtr input = ctxt->input;
5375
5376 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005377 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005378 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005379 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005380 }
5381 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005382 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005383 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005384 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5385 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005386 return;
5387 }
5388 SKIP_BLANKS;
5389 GROW;
5390 while (RAW != '>') {
5391 const xmlChar *check = CUR_PTR;
5392 int type;
5393 int def;
5394 xmlChar *defaultValue = NULL;
5395
5396 GROW;
5397 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005398 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005399 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005400 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5401 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005402 break;
5403 }
5404 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005405 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005406 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005407 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005408 break;
5409 }
5410 SKIP_BLANKS;
5411
5412 type = xmlParseAttributeType(ctxt, &tree);
5413 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005414 break;
5415 }
5416
5417 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005418 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005421 if (tree != NULL)
5422 xmlFreeEnumeration(tree);
5423 break;
5424 }
5425 SKIP_BLANKS;
5426
5427 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5428 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005429 if (defaultValue != NULL)
5430 xmlFree(defaultValue);
5431 if (tree != NULL)
5432 xmlFreeEnumeration(tree);
5433 break;
5434 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005435 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5436 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005437
5438 GROW;
5439 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005440 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005442 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005443 if (defaultValue != NULL)
5444 xmlFree(defaultValue);
5445 if (tree != NULL)
5446 xmlFreeEnumeration(tree);
5447 break;
5448 }
5449 SKIP_BLANKS;
5450 }
5451 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005452 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5453 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005454 if (defaultValue != NULL)
5455 xmlFree(defaultValue);
5456 if (tree != NULL)
5457 xmlFreeEnumeration(tree);
5458 break;
5459 }
5460 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5461 (ctxt->sax->attributeDecl != NULL))
5462 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5463 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005464 else if (tree != NULL)
5465 xmlFreeEnumeration(tree);
5466
5467 if ((ctxt->sax2) && (defaultValue != NULL) &&
5468 (def != XML_ATTRIBUTE_IMPLIED) &&
5469 (def != XML_ATTRIBUTE_REQUIRED)) {
5470 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5471 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005472 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005473 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5474 }
Owen Taylor3473f882001-02-23 17:55:21 +00005475 if (defaultValue != NULL)
5476 xmlFree(defaultValue);
5477 GROW;
5478 }
5479 if (RAW == '>') {
5480 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005481 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5482 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005483 }
5484 NEXT;
5485 }
Owen Taylor3473f882001-02-23 17:55:21 +00005486 }
5487}
5488
5489/**
5490 * xmlParseElementMixedContentDecl:
5491 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005492 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005493 *
5494 * parse the declaration for a Mixed Element content
5495 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5496 *
5497 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5498 * '(' S? '#PCDATA' S? ')'
5499 *
5500 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5501 *
5502 * [ VC: No Duplicate Types ]
5503 * The same name must not appear more than once in a single
5504 * mixed-content declaration.
5505 *
5506 * returns: the list of the xmlElementContentPtr describing the element choices
5507 */
5508xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005509xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005510 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005511 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005512
5513 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005514 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005515 SKIP(7);
5516 SKIP_BLANKS;
5517 SHRINK;
5518 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005519 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005520 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5521"Element content declaration doesn't start and stop in the same entity\n",
5522 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005523 }
Owen Taylor3473f882001-02-23 17:55:21 +00005524 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005525 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005526 if (ret == NULL)
5527 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005528 if (RAW == '*') {
5529 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5530 NEXT;
5531 }
5532 return(ret);
5533 }
5534 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005535 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005536 if (ret == NULL) return(NULL);
5537 }
5538 while (RAW == '|') {
5539 NEXT;
5540 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005541 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 if (ret == NULL) return(NULL);
5543 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005544 if (cur != NULL)
5545 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005546 cur = ret;
5547 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005548 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005549 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005550 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005551 if (n->c1 != NULL)
5552 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005553 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005554 if (n != NULL)
5555 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005556 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005557 }
5558 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005559 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005560 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005561 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005562 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005563 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 return(NULL);
5565 }
5566 SKIP_BLANKS;
5567 GROW;
5568 }
5569 if ((RAW == ')') && (NXT(1) == '*')) {
5570 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005571 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005572 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005573 if (cur->c2 != NULL)
5574 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005575 }
5576 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005577 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005578 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5579"Element content declaration doesn't start and stop in the same entity\n",
5580 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005581 }
Owen Taylor3473f882001-02-23 17:55:21 +00005582 SKIP(2);
5583 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005584 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005585 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 return(NULL);
5587 }
5588
5589 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005590 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005591 }
5592 return(ret);
5593}
5594
5595/**
5596 * xmlParseElementChildrenContentDecl:
5597 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005598 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005599 *
5600 * parse the declaration for a Mixed Element content
5601 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5602 *
5603 *
5604 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5605 *
5606 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5607 *
5608 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5609 *
5610 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5611 *
5612 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5613 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005614 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005615 * opening or closing parentheses in a choice, seq, or Mixed
5616 * construct is contained in the replacement text for a parameter
5617 * entity, both must be contained in the same replacement text. For
5618 * interoperability, if a parameter-entity reference appears in a
5619 * choice, seq, or Mixed construct, its replacement text should not
5620 * be empty, and neither the first nor last non-blank character of
5621 * the replacement text should be a connector (| or ,).
5622 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005623 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005624 * hierarchy.
5625 */
5626xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005627xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005628 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005629 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005630 xmlChar type = 0;
5631
5632 SKIP_BLANKS;
5633 GROW;
5634 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005635 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005636
Owen Taylor3473f882001-02-23 17:55:21 +00005637 /* Recurse on first child */
5638 NEXT;
5639 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005641 SKIP_BLANKS;
5642 GROW;
5643 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005644 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005645 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005646 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005647 return(NULL);
5648 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005649 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005650 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005651 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005652 return(NULL);
5653 }
Owen Taylor3473f882001-02-23 17:55:21 +00005654 GROW;
5655 if (RAW == '?') {
5656 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5657 NEXT;
5658 } else if (RAW == '*') {
5659 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5660 NEXT;
5661 } else if (RAW == '+') {
5662 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5663 NEXT;
5664 } else {
5665 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5666 }
Owen Taylor3473f882001-02-23 17:55:21 +00005667 GROW;
5668 }
5669 SKIP_BLANKS;
5670 SHRINK;
5671 while (RAW != ')') {
5672 /*
5673 * Each loop we parse one separator and one element.
5674 */
5675 if (RAW == ',') {
5676 if (type == 0) type = CUR;
5677
5678 /*
5679 * Detect "Name | Name , Name" error
5680 */
5681 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005682 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005683 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005684 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005685 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005686 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005687 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005688 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005689 return(NULL);
5690 }
5691 NEXT;
5692
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005693 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005694 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005695 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005696 xmlFreeDocElementContent(ctxt->myDoc, last);
5697 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005698 return(NULL);
5699 }
5700 if (last == NULL) {
5701 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005702 if (ret != NULL)
5703 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005704 ret = cur = op;
5705 } else {
5706 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005707 if (op != NULL)
5708 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005709 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005710 if (last != NULL)
5711 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005712 cur =op;
5713 last = NULL;
5714 }
5715 } else if (RAW == '|') {
5716 if (type == 0) type = CUR;
5717
5718 /*
5719 * Detect "Name , Name | Name" error
5720 */
5721 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005722 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005723 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005724 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005725 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005726 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005728 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 return(NULL);
5730 }
5731 NEXT;
5732
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005733 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005734 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005735 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005736 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 return(NULL);
5740 }
5741 if (last == NULL) {
5742 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005743 if (ret != NULL)
5744 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 ret = cur = op;
5746 } else {
5747 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005748 if (op != NULL)
5749 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005751 if (last != NULL)
5752 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005753 cur =op;
5754 last = NULL;
5755 }
5756 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005757 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005758 if ((last != NULL) && (last != ret))
5759 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005761 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005762 return(NULL);
5763 }
5764 GROW;
5765 SKIP_BLANKS;
5766 GROW;
5767 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005768 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005769 /* Recurse on second child */
5770 NEXT;
5771 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005772 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 SKIP_BLANKS;
5774 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005775 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005777 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005778 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005779 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005780 return(NULL);
5781 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005782 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005783 if (last == NULL) {
5784 if (ret != NULL)
5785 xmlFreeDocElementContent(ctxt->myDoc, ret);
5786 return(NULL);
5787 }
Owen Taylor3473f882001-02-23 17:55:21 +00005788 if (RAW == '?') {
5789 last->ocur = XML_ELEMENT_CONTENT_OPT;
5790 NEXT;
5791 } else if (RAW == '*') {
5792 last->ocur = XML_ELEMENT_CONTENT_MULT;
5793 NEXT;
5794 } else if (RAW == '+') {
5795 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5796 NEXT;
5797 } else {
5798 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5799 }
5800 }
5801 SKIP_BLANKS;
5802 GROW;
5803 }
5804 if ((cur != NULL) && (last != NULL)) {
5805 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005806 if (last != NULL)
5807 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005808 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005809 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005810 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5811"Element content declaration doesn't start and stop in the same entity\n",
5812 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005813 }
Owen Taylor3473f882001-02-23 17:55:21 +00005814 NEXT;
5815 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005816 if (ret != NULL) {
5817 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5818 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5819 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5820 else
5821 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5822 }
Owen Taylor3473f882001-02-23 17:55:21 +00005823 NEXT;
5824 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005825 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005826 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005827 cur = ret;
5828 /*
5829 * Some normalization:
5830 * (a | b* | c?)* == (a | b | c)*
5831 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005832 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005833 if ((cur->c1 != NULL) &&
5834 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5835 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5836 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5837 if ((cur->c2 != NULL) &&
5838 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5839 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5840 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5841 cur = cur->c2;
5842 }
5843 }
Owen Taylor3473f882001-02-23 17:55:21 +00005844 NEXT;
5845 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005846 if (ret != NULL) {
5847 int found = 0;
5848
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005849 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5850 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5851 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005852 else
5853 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005854 /*
5855 * Some normalization:
5856 * (a | b*)+ == (a | b)*
5857 * (a | b?)+ == (a | b)*
5858 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005859 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005860 if ((cur->c1 != NULL) &&
5861 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5862 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5863 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5864 found = 1;
5865 }
5866 if ((cur->c2 != NULL) &&
5867 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5868 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5869 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5870 found = 1;
5871 }
5872 cur = cur->c2;
5873 }
5874 if (found)
5875 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5876 }
Owen Taylor3473f882001-02-23 17:55:21 +00005877 NEXT;
5878 }
5879 return(ret);
5880}
5881
5882/**
5883 * xmlParseElementContentDecl:
5884 * @ctxt: an XML parser context
5885 * @name: the name of the element being defined.
5886 * @result: the Element Content pointer will be stored here if any
5887 *
5888 * parse the declaration for an Element content either Mixed or Children,
5889 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5890 *
5891 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5892 *
5893 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5894 */
5895
5896int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005897xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005898 xmlElementContentPtr *result) {
5899
5900 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005901 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 int res;
5903
5904 *result = NULL;
5905
5906 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005907 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005908 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 return(-1);
5910 }
5911 NEXT;
5912 GROW;
5913 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005914 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005915 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005916 res = XML_ELEMENT_TYPE_MIXED;
5917 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005918 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005919 res = XML_ELEMENT_TYPE_ELEMENT;
5920 }
Owen Taylor3473f882001-02-23 17:55:21 +00005921 SKIP_BLANKS;
5922 *result = tree;
5923 return(res);
5924}
5925
5926/**
5927 * xmlParseElementDecl:
5928 * @ctxt: an XML parser context
5929 *
5930 * parse an Element declaration.
5931 *
5932 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5933 *
5934 * [ VC: Unique Element Type Declaration ]
5935 * No element type may be declared more than once
5936 *
5937 * Returns the type of the element, or -1 in case of error
5938 */
5939int
5940xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005941 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005942 int ret = -1;
5943 xmlElementContentPtr content = NULL;
5944
Daniel Veillard4c778d82005-01-23 17:37:44 +00005945 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005946 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 xmlParserInputPtr input = ctxt->input;
5948
5949 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005950 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5952 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005953 }
5954 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005955 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005957 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5958 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005959 return(-1);
5960 }
5961 while ((RAW == 0) && (ctxt->inputNr > 1))
5962 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005963 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005964 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5965 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005966 }
5967 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005968 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005969 SKIP(5);
5970 /*
5971 * Element must always be empty.
5972 */
5973 ret = XML_ELEMENT_TYPE_EMPTY;
5974 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5975 (NXT(2) == 'Y')) {
5976 SKIP(3);
5977 /*
5978 * Element is a generic container.
5979 */
5980 ret = XML_ELEMENT_TYPE_ANY;
5981 } else if (RAW == '(') {
5982 ret = xmlParseElementContentDecl(ctxt, name, &content);
5983 } else {
5984 /*
5985 * [ WFC: PEs in Internal Subset ] error handling.
5986 */
5987 if ((RAW == '%') && (ctxt->external == 0) &&
5988 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005989 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005990 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005991 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005992 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005993 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5994 }
Owen Taylor3473f882001-02-23 17:55:21 +00005995 return(-1);
5996 }
5997
5998 SKIP_BLANKS;
5999 /*
6000 * Pop-up of finished entities.
6001 */
6002 while ((RAW == 0) && (ctxt->inputNr > 1))
6003 xmlPopInput(ctxt);
6004 SKIP_BLANKS;
6005
6006 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006007 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006008 if (content != NULL) {
6009 xmlFreeDocElementContent(ctxt->myDoc, content);
6010 }
Owen Taylor3473f882001-02-23 17:55:21 +00006011 } else {
6012 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6014 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006015 }
6016
6017 NEXT;
6018 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006019 (ctxt->sax->elementDecl != NULL)) {
6020 if (content != NULL)
6021 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006022 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6023 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006024 if ((content != NULL) && (content->parent == NULL)) {
6025 /*
6026 * this is a trick: if xmlAddElementDecl is called,
6027 * instead of copying the full tree it is plugged directly
6028 * if called from the parser. Avoid duplicating the
6029 * interfaces or change the API/ABI
6030 */
6031 xmlFreeDocElementContent(ctxt->myDoc, content);
6032 }
6033 } else if (content != NULL) {
6034 xmlFreeDocElementContent(ctxt->myDoc, content);
6035 }
Owen Taylor3473f882001-02-23 17:55:21 +00006036 }
Owen Taylor3473f882001-02-23 17:55:21 +00006037 }
6038 return(ret);
6039}
6040
6041/**
Owen Taylor3473f882001-02-23 17:55:21 +00006042 * xmlParseConditionalSections
6043 * @ctxt: an XML parser context
6044 *
6045 * [61] conditionalSect ::= includeSect | ignoreSect
6046 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6047 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6048 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6049 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6050 */
6051
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006052static void
Owen Taylor3473f882001-02-23 17:55:21 +00006053xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6054 SKIP(3);
6055 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006056 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006057 SKIP(7);
6058 SKIP_BLANKS;
6059 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006060 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 } else {
6062 NEXT;
6063 }
6064 if (xmlParserDebugEntities) {
6065 if ((ctxt->input != NULL) && (ctxt->input->filename))
6066 xmlGenericError(xmlGenericErrorContext,
6067 "%s(%d): ", ctxt->input->filename,
6068 ctxt->input->line);
6069 xmlGenericError(xmlGenericErrorContext,
6070 "Entering INCLUDE Conditional Section\n");
6071 }
6072
6073 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6074 (NXT(2) != '>'))) {
6075 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006076 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006077
6078 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6079 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006080 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006081 NEXT;
6082 } else if (RAW == '%') {
6083 xmlParsePEReference(ctxt);
6084 } else
6085 xmlParseMarkupDecl(ctxt);
6086
6087 /*
6088 * Pop-up of finished entities.
6089 */
6090 while ((RAW == 0) && (ctxt->inputNr > 1))
6091 xmlPopInput(ctxt);
6092
Daniel Veillardfdc91562002-07-01 21:52:03 +00006093 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006094 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006095 break;
6096 }
6097 }
6098 if (xmlParserDebugEntities) {
6099 if ((ctxt->input != NULL) && (ctxt->input->filename))
6100 xmlGenericError(xmlGenericErrorContext,
6101 "%s(%d): ", ctxt->input->filename,
6102 ctxt->input->line);
6103 xmlGenericError(xmlGenericErrorContext,
6104 "Leaving INCLUDE Conditional Section\n");
6105 }
6106
Daniel Veillarda07050d2003-10-19 14:46:32 +00006107 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006108 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006109 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006110 int depth = 0;
6111
6112 SKIP(6);
6113 SKIP_BLANKS;
6114 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006115 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006116 } else {
6117 NEXT;
6118 }
6119 if (xmlParserDebugEntities) {
6120 if ((ctxt->input != NULL) && (ctxt->input->filename))
6121 xmlGenericError(xmlGenericErrorContext,
6122 "%s(%d): ", ctxt->input->filename,
6123 ctxt->input->line);
6124 xmlGenericError(xmlGenericErrorContext,
6125 "Entering IGNORE Conditional Section\n");
6126 }
6127
6128 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006129 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006130 * But disable SAX event generating DTD building in the meantime
6131 */
6132 state = ctxt->disableSAX;
6133 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006134 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006135 ctxt->instate = XML_PARSER_IGNORE;
6136
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006137 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006138 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6139 depth++;
6140 SKIP(3);
6141 continue;
6142 }
6143 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6144 if (--depth >= 0) SKIP(3);
6145 continue;
6146 }
6147 NEXT;
6148 continue;
6149 }
6150
6151 ctxt->disableSAX = state;
6152 ctxt->instate = instate;
6153
6154 if (xmlParserDebugEntities) {
6155 if ((ctxt->input != NULL) && (ctxt->input->filename))
6156 xmlGenericError(xmlGenericErrorContext,
6157 "%s(%d): ", ctxt->input->filename,
6158 ctxt->input->line);
6159 xmlGenericError(xmlGenericErrorContext,
6160 "Leaving IGNORE Conditional Section\n");
6161 }
6162
6163 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006164 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
6166
6167 if (RAW == 0)
6168 SHRINK;
6169
6170 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006171 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 } else {
6173 SKIP(3);
6174 }
6175}
6176
6177/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006178 * xmlParseMarkupDecl:
6179 * @ctxt: an XML parser context
6180 *
6181 * parse Markup declarations
6182 *
6183 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6184 * NotationDecl | PI | Comment
6185 *
6186 * [ VC: Proper Declaration/PE Nesting ]
6187 * Parameter-entity replacement text must be properly nested with
6188 * markup declarations. That is to say, if either the first character
6189 * or the last character of a markup declaration (markupdecl above) is
6190 * contained in the replacement text for a parameter-entity reference,
6191 * both must be contained in the same replacement text.
6192 *
6193 * [ WFC: PEs in Internal Subset ]
6194 * In the internal DTD subset, parameter-entity references can occur
6195 * only where markup declarations can occur, not within markup declarations.
6196 * (This does not apply to references that occur in external parameter
6197 * entities or to the external subset.)
6198 */
6199void
6200xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6201 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006202 if (CUR == '<') {
6203 if (NXT(1) == '!') {
6204 switch (NXT(2)) {
6205 case 'E':
6206 if (NXT(3) == 'L')
6207 xmlParseElementDecl(ctxt);
6208 else if (NXT(3) == 'N')
6209 xmlParseEntityDecl(ctxt);
6210 break;
6211 case 'A':
6212 xmlParseAttributeListDecl(ctxt);
6213 break;
6214 case 'N':
6215 xmlParseNotationDecl(ctxt);
6216 break;
6217 case '-':
6218 xmlParseComment(ctxt);
6219 break;
6220 default:
6221 /* there is an error but it will be detected later */
6222 break;
6223 }
6224 } else if (NXT(1) == '?') {
6225 xmlParsePI(ctxt);
6226 }
6227 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006228 /*
6229 * This is only for internal subset. On external entities,
6230 * the replacement is done before parsing stage
6231 */
6232 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6233 xmlParsePEReference(ctxt);
6234
6235 /*
6236 * Conditional sections are allowed from entities included
6237 * by PE References in the internal subset.
6238 */
6239 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6240 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6241 xmlParseConditionalSections(ctxt);
6242 }
6243 }
6244
6245 ctxt->instate = XML_PARSER_DTD;
6246}
6247
6248/**
6249 * xmlParseTextDecl:
6250 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006251 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006252 * parse an XML declaration header for external entities
6253 *
6254 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006255 */
6256
6257void
6258xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6259 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006260 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006261
6262 /*
6263 * We know that '<?xml' is here.
6264 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006265 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006266 SKIP(5);
6267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006268 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006269 return;
6270 }
6271
William M. Brack76e95df2003-10-18 16:20:14 +00006272 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006273 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6274 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006275 }
6276 SKIP_BLANKS;
6277
6278 /*
6279 * We may have the VersionInfo here.
6280 */
6281 version = xmlParseVersionInfo(ctxt);
6282 if (version == NULL)
6283 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006284 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006285 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006286 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6287 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006288 }
6289 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006290 ctxt->input->version = version;
6291
6292 /*
6293 * We must have the encoding declaration
6294 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006295 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006296 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6297 /*
6298 * The XML REC instructs us to stop parsing right here
6299 */
6300 return;
6301 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006302 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6303 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6304 "Missing encoding in text declaration\n");
6305 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006306
6307 SKIP_BLANKS;
6308 if ((RAW == '?') && (NXT(1) == '>')) {
6309 SKIP(2);
6310 } else if (RAW == '>') {
6311 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006312 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006313 NEXT;
6314 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006315 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006316 MOVETO_ENDTAG(CUR_PTR);
6317 NEXT;
6318 }
6319}
6320
6321/**
Owen Taylor3473f882001-02-23 17:55:21 +00006322 * xmlParseExternalSubset:
6323 * @ctxt: an XML parser context
6324 * @ExternalID: the external identifier
6325 * @SystemID: the system identifier (or URL)
6326 *
6327 * parse Markup declarations from an external subset
6328 *
6329 * [30] extSubset ::= textDecl? extSubsetDecl
6330 *
6331 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6332 */
6333void
6334xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6335 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006336 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006338
6339 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6340 (ctxt->input->end - ctxt->input->cur >= 4)) {
6341 xmlChar start[4];
6342 xmlCharEncoding enc;
6343
6344 start[0] = RAW;
6345 start[1] = NXT(1);
6346 start[2] = NXT(2);
6347 start[3] = NXT(3);
6348 enc = xmlDetectCharEncoding(start, 4);
6349 if (enc != XML_CHAR_ENCODING_NONE)
6350 xmlSwitchEncoding(ctxt, enc);
6351 }
6352
Daniel Veillarda07050d2003-10-19 14:46:32 +00006353 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006354 xmlParseTextDecl(ctxt);
6355 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6356 /*
6357 * The XML REC instructs us to stop parsing right here
6358 */
6359 ctxt->instate = XML_PARSER_EOF;
6360 return;
6361 }
6362 }
6363 if (ctxt->myDoc == NULL) {
6364 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6365 }
6366 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6367 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6368
6369 ctxt->instate = XML_PARSER_DTD;
6370 ctxt->external = 1;
6371 while (((RAW == '<') && (NXT(1) == '?')) ||
6372 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006373 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006374 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006375 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006376
6377 GROW;
6378 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6379 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006380 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006381 NEXT;
6382 } else if (RAW == '%') {
6383 xmlParsePEReference(ctxt);
6384 } else
6385 xmlParseMarkupDecl(ctxt);
6386
6387 /*
6388 * Pop-up of finished entities.
6389 */
6390 while ((RAW == 0) && (ctxt->inputNr > 1))
6391 xmlPopInput(ctxt);
6392
Daniel Veillardfdc91562002-07-01 21:52:03 +00006393 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006394 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006395 break;
6396 }
6397 }
6398
6399 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006400 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
6402
6403}
6404
6405/**
6406 * xmlParseReference:
6407 * @ctxt: an XML parser context
6408 *
6409 * parse and handle entity references in content, depending on the SAX
6410 * interface, this may end-up in a call to character() if this is a
6411 * CharRef, a predefined entity, if there is no reference() callback.
6412 * or if the parser was asked to switch to that mode.
6413 *
6414 * [67] Reference ::= EntityRef | CharRef
6415 */
6416void
6417xmlParseReference(xmlParserCtxtPtr ctxt) {
6418 xmlEntityPtr ent;
6419 xmlChar *val;
6420 if (RAW != '&') return;
6421
6422 if (NXT(1) == '#') {
6423 int i = 0;
6424 xmlChar out[10];
6425 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006426 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006427
Daniel Veillarddc171602008-03-26 17:41:38 +00006428 if (value == 0)
6429 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006430 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6431 /*
6432 * So we are using non-UTF-8 buffers
6433 * Check that the char fit on 8bits, if not
6434 * generate a CharRef.
6435 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006436 if (value <= 0xFF) {
6437 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006438 out[1] = 0;
6439 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6440 (!ctxt->disableSAX))
6441 ctxt->sax->characters(ctxt->userData, out, 1);
6442 } else {
6443 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006444 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006445 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006446 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006447 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6448 (!ctxt->disableSAX))
6449 ctxt->sax->reference(ctxt->userData, out);
6450 }
6451 } else {
6452 /*
6453 * Just encode the value in UTF-8
6454 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006455 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006456 out[i] = 0;
6457 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6458 (!ctxt->disableSAX))
6459 ctxt->sax->characters(ctxt->userData, out, i);
6460 }
6461 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006462 int was_checked;
6463
Owen Taylor3473f882001-02-23 17:55:21 +00006464 ent = xmlParseEntityRef(ctxt);
6465 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006466 if (!ctxt->wellFormed)
6467 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006468 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006469 if ((ent->name != NULL) &&
6470 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6471 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006472 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006473
6474
6475 /*
6476 * The first reference to the entity trigger a parsing phase
6477 * where the ent->children is filled with the result from
6478 * the parsing.
6479 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006480 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006481 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006482
Owen Taylor3473f882001-02-23 17:55:21 +00006483 value = ent->content;
6484
6485 /*
6486 * Check that this entity is well formed
6487 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006488 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006489 (value[1] == 0) && (value[0] == '<') &&
6490 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6491 /*
6492 * DONE: get definite answer on this !!!
6493 * Lots of entity decls are used to declare a single
6494 * char
6495 * <!ENTITY lt "<">
6496 * Which seems to be valid since
6497 * 2.4: The ampersand character (&) and the left angle
6498 * bracket (<) may appear in their literal form only
6499 * when used ... They are also legal within the literal
6500 * entity value of an internal entity declaration;i
6501 * see "4.3.2 Well-Formed Parsed Entities".
6502 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6503 * Looking at the OASIS test suite and James Clark
6504 * tests, this is broken. However the XML REC uses
6505 * it. Is the XML REC not well-formed ????
6506 * This is a hack to avoid this problem
6507 *
6508 * ANSWER: since lt gt amp .. are already defined,
6509 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006510 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006511 * is lousy but acceptable.
6512 */
6513 list = xmlNewDocText(ctxt->myDoc, value);
6514 if (list != NULL) {
6515 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6516 (ent->children == NULL)) {
6517 ent->children = list;
6518 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006519 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006520 list->parent = (xmlNodePtr) ent;
6521 } else {
6522 xmlFreeNodeList(list);
6523 }
6524 } else if (list != NULL) {
6525 xmlFreeNodeList(list);
6526 }
6527 } else {
6528 /*
6529 * 4.3.2: An internal general parsed entity is well-formed
6530 * if its replacement text matches the production labeled
6531 * content.
6532 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006533
6534 void *user_data;
6535 /*
6536 * This is a bit hackish but this seems the best
6537 * way to make sure both SAX and DOM entity support
6538 * behaves okay.
6539 */
6540 if (ctxt->userData == ctxt)
6541 user_data = NULL;
6542 else
6543 user_data = ctxt->userData;
6544
Owen Taylor3473f882001-02-23 17:55:21 +00006545 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6546 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006547 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6548 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006549 ctxt->depth--;
6550 } else if (ent->etype ==
6551 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6552 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006553 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006554 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006555 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006556 ctxt->depth--;
6557 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006558 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006559 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6560 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 }
6562 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006563 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006564 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006565 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006566 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6567 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006568 (ent->children == NULL)) {
6569 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006570 if (ctxt->replaceEntities) {
6571 /*
6572 * Prune it directly in the generated document
6573 * except for single text nodes.
6574 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006575 if (((list->type == XML_TEXT_NODE) &&
6576 (list->next == NULL)) ||
6577 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006578 list->parent = (xmlNodePtr) ent;
6579 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006580 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006581 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006582 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006583 while (list != NULL) {
6584 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006585 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006586 if (list->next == NULL)
6587 ent->last = list;
6588 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006589 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006590 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006591#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006592 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6593 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006594#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006595 }
6596 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006597 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006598 while (list != NULL) {
6599 list->parent = (xmlNodePtr) ent;
6600 if (list->next == NULL)
6601 ent->last = list;
6602 list = list->next;
6603 }
Owen Taylor3473f882001-02-23 17:55:21 +00006604 }
6605 } else {
6606 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006607 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006608 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006609 } else if ((ret != XML_ERR_OK) &&
6610 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006611 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6612 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006613 } else if (list != NULL) {
6614 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006615 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006616 }
6617 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006618 ent->checked = 1;
6619 }
6620
6621 if (ent->children == NULL) {
6622 /*
6623 * Probably running in SAX mode and the callbacks don't
6624 * build the entity content. So unless we already went
6625 * though parsing for first checking go though the entity
6626 * content to generate callbacks associated to the entity
6627 */
6628 if (was_checked == 1) {
6629 void *user_data;
6630 /*
6631 * This is a bit hackish but this seems the best
6632 * way to make sure both SAX and DOM entity support
6633 * behaves okay.
6634 */
6635 if (ctxt->userData == ctxt)
6636 user_data = NULL;
6637 else
6638 user_data = ctxt->userData;
6639
6640 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6641 ctxt->depth++;
6642 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6643 ent->content, user_data, NULL);
6644 ctxt->depth--;
6645 } else if (ent->etype ==
6646 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6647 ctxt->depth++;
6648 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6649 ctxt->sax, user_data, ctxt->depth,
6650 ent->URI, ent->ExternalID, NULL);
6651 ctxt->depth--;
6652 } else {
6653 ret = XML_ERR_ENTITY_PE_INTERNAL;
6654 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6655 "invalid entity type found\n", NULL);
6656 }
6657 if (ret == XML_ERR_ENTITY_LOOP) {
6658 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6659 return;
6660 }
6661 }
6662 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6663 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6664 /*
6665 * Entity reference callback comes second, it's somewhat
6666 * superfluous but a compatibility to historical behaviour
6667 */
6668 ctxt->sax->reference(ctxt->userData, ent->name);
6669 }
6670 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006671 }
6672 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006673 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006674 /*
6675 * Create a node.
6676 */
6677 ctxt->sax->reference(ctxt->userData, ent->name);
6678 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006679 }
6680 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006681 /*
6682 * There is a problem on the handling of _private for entities
6683 * (bug 155816): Should we copy the content of the field from
6684 * the entity (possibly overwriting some value set by the user
6685 * when a copy is created), should we leave it alone, or should
6686 * we try to take care of different situations? The problem
6687 * is exacerbated by the usage of this field by the xmlReader.
6688 * To fix this bug, we look at _private on the created node
6689 * and, if it's NULL, we copy in whatever was in the entity.
6690 * If it's not NULL we leave it alone. This is somewhat of a
6691 * hack - maybe we should have further tests to determine
6692 * what to do.
6693 */
Owen Taylor3473f882001-02-23 17:55:21 +00006694 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6695 /*
6696 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006697 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006698 * In the first occurrence list contains the replacement.
6699 * progressive == 2 means we are operating on the Reader
6700 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006701 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006702 if (((list == NULL) && (ent->owner == 0)) ||
6703 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006704 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006705
6706 /*
6707 * when operating on a reader, the entities definitions
6708 * are always owning the entities subtree.
6709 if (ctxt->parseMode == XML_PARSE_READER)
6710 ent->owner = 1;
6711 */
6712
Daniel Veillard62f313b2001-07-04 19:49:14 +00006713 cur = ent->children;
6714 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006715 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006716 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006717 if (nw->_private == NULL)
6718 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006719 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006720 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006721 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006722 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006723 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006724 if (cur == ent->last) {
6725 /*
6726 * needed to detect some strange empty
6727 * node cases in the reader tests
6728 */
6729 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006730 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006731 (nw->type == XML_ELEMENT_NODE) &&
6732 (nw->children == NULL))
6733 nw->extra = 1;
6734
Daniel Veillard62f313b2001-07-04 19:49:14 +00006735 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006736 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006737 cur = cur->next;
6738 }
Daniel Veillard81273902003-09-30 00:43:48 +00006739#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006740 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006741 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006742#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006743 } else if (list == NULL) {
6744 xmlNodePtr nw = NULL, cur, next, last,
6745 firstChild = NULL;
6746 /*
6747 * Copy the entity child list and make it the new
6748 * entity child list. The goal is to make sure any
6749 * ID or REF referenced will be the one from the
6750 * document content and not the entity copy.
6751 */
6752 cur = ent->children;
6753 ent->children = NULL;
6754 last = ent->last;
6755 ent->last = NULL;
6756 while (cur != NULL) {
6757 next = cur->next;
6758 cur->next = NULL;
6759 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006760 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006761 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006762 if (nw->_private == NULL)
6763 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006764 if (firstChild == NULL){
6765 firstChild = cur;
6766 }
6767 xmlAddChild((xmlNodePtr) ent, nw);
6768 xmlAddChild(ctxt->node, cur);
6769 }
6770 if (cur == last)
6771 break;
6772 cur = next;
6773 }
6774 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006775#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006776 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6777 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006778#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006779 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006780 const xmlChar *nbktext;
6781
Daniel Veillard62f313b2001-07-04 19:49:14 +00006782 /*
6783 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006784 * node with a possible previous text one which
6785 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006786 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006787 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6788 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006789 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006790 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006791 if ((ent->last != ent->children) &&
6792 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006793 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006794 xmlAddChildList(ctxt->node, ent->children);
6795 }
6796
Owen Taylor3473f882001-02-23 17:55:21 +00006797 /*
6798 * This is to avoid a nasty side effect, see
6799 * characters() in SAX.c
6800 */
6801 ctxt->nodemem = 0;
6802 ctxt->nodelen = 0;
6803 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006804 }
6805 }
6806 } else {
6807 val = ent->content;
6808 if (val == NULL) return;
6809 /*
6810 * inline the entity.
6811 */
6812 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6813 (!ctxt->disableSAX))
6814 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6815 }
6816 }
6817}
6818
6819/**
6820 * xmlParseEntityRef:
6821 * @ctxt: an XML parser context
6822 *
6823 * parse ENTITY references declarations
6824 *
6825 * [68] EntityRef ::= '&' Name ';'
6826 *
6827 * [ WFC: Entity Declared ]
6828 * In a document without any DTD, a document with only an internal DTD
6829 * subset which contains no parameter entity references, or a document
6830 * with "standalone='yes'", the Name given in the entity reference
6831 * must match that in an entity declaration, except that well-formed
6832 * documents need not declare any of the following entities: amp, lt,
6833 * gt, apos, quot. The declaration of a parameter entity must precede
6834 * any reference to it. Similarly, the declaration of a general entity
6835 * must precede any reference to it which appears in a default value in an
6836 * attribute-list declaration. Note that if entities are declared in the
6837 * external subset or in external parameter entities, a non-validating
6838 * processor is not obligated to read and process their declarations;
6839 * for such documents, the rule that an entity must be declared is a
6840 * well-formedness constraint only if standalone='yes'.
6841 *
6842 * [ WFC: Parsed Entity ]
6843 * An entity reference must not contain the name of an unparsed entity
6844 *
6845 * Returns the xmlEntityPtr if found, or NULL otherwise.
6846 */
6847xmlEntityPtr
6848xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006849 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006850 xmlEntityPtr ent = NULL;
6851
6852 GROW;
6853
6854 if (RAW == '&') {
6855 NEXT;
6856 name = xmlParseName(ctxt);
6857 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006858 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6859 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006860 } else {
6861 if (RAW == ';') {
6862 NEXT;
6863 /*
6864 * Ask first SAX for entity resolution, otherwise try the
6865 * predefined set.
6866 */
6867 if (ctxt->sax != NULL) {
6868 if (ctxt->sax->getEntity != NULL)
6869 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006870 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006871 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006872 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6873 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006874 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006875 }
Owen Taylor3473f882001-02-23 17:55:21 +00006876 }
6877 /*
6878 * [ WFC: Entity Declared ]
6879 * In a document without any DTD, a document with only an
6880 * internal DTD subset which contains no parameter entity
6881 * references, or a document with "standalone='yes'", the
6882 * Name given in the entity reference must match that in an
6883 * entity declaration, except that well-formed documents
6884 * need not declare any of the following entities: amp, lt,
6885 * gt, apos, quot.
6886 * The declaration of a parameter entity must precede any
6887 * reference to it.
6888 * Similarly, the declaration of a general entity must
6889 * precede any reference to it which appears in a default
6890 * value in an attribute-list declaration. Note that if
6891 * entities are declared in the external subset or in
6892 * external parameter entities, a non-validating processor
6893 * is not obligated to read and process their declarations;
6894 * for such documents, the rule that an entity must be
6895 * declared is a well-formedness constraint only if
6896 * standalone='yes'.
6897 */
6898 if (ent == NULL) {
6899 if ((ctxt->standalone == 1) ||
6900 ((ctxt->hasExternalSubset == 0) &&
6901 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006902 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006903 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006904 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006905 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006906 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006907 if ((ctxt->inSubset == 0) &&
6908 (ctxt->sax != NULL) &&
6909 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006910 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006911 }
Owen Taylor3473f882001-02-23 17:55:21 +00006912 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006913 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006914 }
6915
6916 /*
6917 * [ WFC: Parsed Entity ]
6918 * An entity reference must not contain the name of an
6919 * unparsed entity
6920 */
6921 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006922 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006923 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006924 }
6925
6926 /*
6927 * [ WFC: No External Entity References ]
6928 * Attribute values cannot contain direct or indirect
6929 * entity references to external entities.
6930 */
6931 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6932 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006933 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6934 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006935 }
6936 /*
6937 * [ WFC: No < in Attribute Values ]
6938 * The replacement text of any entity referred to directly or
6939 * indirectly in an attribute value (other than "&lt;") must
6940 * not contain a <.
6941 */
6942 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6943 (ent != NULL) &&
6944 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6945 (ent->content != NULL) &&
6946 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006947 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006948 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006949 }
6950
6951 /*
6952 * Internal check, no parameter entities here ...
6953 */
6954 else {
6955 switch (ent->etype) {
6956 case XML_INTERNAL_PARAMETER_ENTITY:
6957 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006958 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6959 "Attempt to reference the parameter entity '%s'\n",
6960 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006961 break;
6962 default:
6963 break;
6964 }
6965 }
6966
6967 /*
6968 * [ WFC: No Recursion ]
6969 * A parsed entity must not contain a recursive reference
6970 * to itself, either directly or indirectly.
6971 * Done somewhere else
6972 */
6973
6974 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006975 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006976 }
Owen Taylor3473f882001-02-23 17:55:21 +00006977 }
6978 }
6979 return(ent);
6980}
6981
6982/**
6983 * xmlParseStringEntityRef:
6984 * @ctxt: an XML parser context
6985 * @str: a pointer to an index in the string
6986 *
6987 * parse ENTITY references declarations, but this version parses it from
6988 * a string value.
6989 *
6990 * [68] EntityRef ::= '&' Name ';'
6991 *
6992 * [ WFC: Entity Declared ]
6993 * In a document without any DTD, a document with only an internal DTD
6994 * subset which contains no parameter entity references, or a document
6995 * with "standalone='yes'", the Name given in the entity reference
6996 * must match that in an entity declaration, except that well-formed
6997 * documents need not declare any of the following entities: amp, lt,
6998 * gt, apos, quot. The declaration of a parameter entity must precede
6999 * any reference to it. Similarly, the declaration of a general entity
7000 * must precede any reference to it which appears in a default value in an
7001 * attribute-list declaration. Note that if entities are declared in the
7002 * external subset or in external parameter entities, a non-validating
7003 * processor is not obligated to read and process their declarations;
7004 * for such documents, the rule that an entity must be declared is a
7005 * well-formedness constraint only if standalone='yes'.
7006 *
7007 * [ WFC: Parsed Entity ]
7008 * An entity reference must not contain the name of an unparsed entity
7009 *
7010 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7011 * is updated to the current location in the string.
7012 */
7013xmlEntityPtr
7014xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7015 xmlChar *name;
7016 const xmlChar *ptr;
7017 xmlChar cur;
7018 xmlEntityPtr ent = NULL;
7019
7020 if ((str == NULL) || (*str == NULL))
7021 return(NULL);
7022 ptr = *str;
7023 cur = *ptr;
7024 if (cur == '&') {
7025 ptr++;
7026 cur = *ptr;
7027 name = xmlParseStringName(ctxt, &ptr);
7028 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007029 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7030 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007031 } else {
7032 if (*ptr == ';') {
7033 ptr++;
7034 /*
7035 * Ask first SAX for entity resolution, otherwise try the
7036 * predefined set.
7037 */
7038 if (ctxt->sax != NULL) {
7039 if (ctxt->sax->getEntity != NULL)
7040 ent = ctxt->sax->getEntity(ctxt->userData, name);
7041 if (ent == NULL)
7042 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007043 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00007044 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007045 }
Owen Taylor3473f882001-02-23 17:55:21 +00007046 }
7047 /*
7048 * [ WFC: Entity Declared ]
7049 * In a document without any DTD, a document with only an
7050 * internal DTD subset which contains no parameter entity
7051 * references, or a document with "standalone='yes'", the
7052 * Name given in the entity reference must match that in an
7053 * entity declaration, except that well-formed documents
7054 * need not declare any of the following entities: amp, lt,
7055 * gt, apos, quot.
7056 * The declaration of a parameter entity must precede any
7057 * reference to it.
7058 * Similarly, the declaration of a general entity must
7059 * precede any reference to it which appears in a default
7060 * value in an attribute-list declaration. Note that if
7061 * entities are declared in the external subset or in
7062 * external parameter entities, a non-validating processor
7063 * is not obligated to read and process their declarations;
7064 * for such documents, the rule that an entity must be
7065 * declared is a well-formedness constraint only if
7066 * standalone='yes'.
7067 */
7068 if (ent == NULL) {
7069 if ((ctxt->standalone == 1) ||
7070 ((ctxt->hasExternalSubset == 0) &&
7071 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007072 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007073 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007074 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007075 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00007076 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007077 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007078 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00007079 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00007080 }
7081
7082 /*
7083 * [ WFC: Parsed Entity ]
7084 * An entity reference must not contain the name of an
7085 * unparsed entity
7086 */
7087 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007088 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007089 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007090 }
7091
7092 /*
7093 * [ WFC: No External Entity References ]
7094 * Attribute values cannot contain direct or indirect
7095 * entity references to external entities.
7096 */
7097 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7098 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007099 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00007100 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007101 }
7102 /*
7103 * [ WFC: No < in Attribute Values ]
7104 * The replacement text of any entity referred to directly or
7105 * indirectly in an attribute value (other than "&lt;") must
7106 * not contain a <.
7107 */
7108 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7109 (ent != NULL) &&
7110 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7111 (ent->content != NULL) &&
7112 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007113 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7114 "'<' in entity '%s' is not allowed in attributes values\n",
7115 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007116 }
7117
7118 /*
7119 * Internal check, no parameter entities here ...
7120 */
7121 else {
7122 switch (ent->etype) {
7123 case XML_INTERNAL_PARAMETER_ENTITY:
7124 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00007125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7126 "Attempt to reference the parameter entity '%s'\n",
7127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 break;
7129 default:
7130 break;
7131 }
7132 }
7133
7134 /*
7135 * [ WFC: No Recursion ]
7136 * A parsed entity must not contain a recursive reference
7137 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007138 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00007139 */
7140
7141 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007142 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007143 }
7144 xmlFree(name);
7145 }
7146 }
7147 *str = ptr;
7148 return(ent);
7149}
7150
7151/**
7152 * xmlParsePEReference:
7153 * @ctxt: an XML parser context
7154 *
7155 * parse PEReference declarations
7156 * The entity content is handled directly by pushing it's content as
7157 * a new input stream.
7158 *
7159 * [69] PEReference ::= '%' Name ';'
7160 *
7161 * [ WFC: No Recursion ]
7162 * A parsed entity must not contain a recursive
7163 * reference to itself, either directly or indirectly.
7164 *
7165 * [ WFC: Entity Declared ]
7166 * In a document without any DTD, a document with only an internal DTD
7167 * subset which contains no parameter entity references, or a document
7168 * with "standalone='yes'", ... ... The declaration of a parameter
7169 * entity must precede any reference to it...
7170 *
7171 * [ VC: Entity Declared ]
7172 * In a document with an external subset or external parameter entities
7173 * with "standalone='no'", ... ... The declaration of a parameter entity
7174 * must precede any reference to it...
7175 *
7176 * [ WFC: In DTD ]
7177 * Parameter-entity references may only appear in the DTD.
7178 * NOTE: misleading but this is handled.
7179 */
7180void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007181xmlParsePEReference(xmlParserCtxtPtr ctxt)
7182{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007183 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007184 xmlEntityPtr entity = NULL;
7185 xmlParserInputPtr input;
7186
7187 if (RAW == '%') {
7188 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00007189 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00007190 if (name == NULL) {
7191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7192 "xmlParsePEReference: no name\n");
7193 } else {
7194 if (RAW == ';') {
7195 NEXT;
7196 if ((ctxt->sax != NULL) &&
7197 (ctxt->sax->getParameterEntity != NULL))
7198 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7199 name);
7200 if (entity == NULL) {
7201 /*
7202 * [ WFC: Entity Declared ]
7203 * In a document without any DTD, a document with only an
7204 * internal DTD subset which contains no parameter entity
7205 * references, or a document with "standalone='yes'", ...
7206 * ... The declaration of a parameter entity must precede
7207 * any reference to it...
7208 */
7209 if ((ctxt->standalone == 1) ||
7210 ((ctxt->hasExternalSubset == 0) &&
7211 (ctxt->hasPErefs == 0))) {
7212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7213 "PEReference: %%%s; not found\n",
7214 name);
7215 } else {
7216 /*
7217 * [ VC: Entity Declared ]
7218 * In a document with an external subset or external
7219 * parameter entities with "standalone='no'", ...
7220 * ... The declaration of a parameter entity must
7221 * precede any reference to it...
7222 */
7223 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7224 "PEReference: %%%s; not found\n",
7225 name, NULL);
7226 ctxt->valid = 0;
7227 }
7228 } else {
7229 /*
7230 * Internal checking in case the entity quest barfed
7231 */
7232 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7233 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7234 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7235 "Internal: %%%s; is not a parameter entity\n",
7236 name, NULL);
7237 } else if (ctxt->input->free != deallocblankswrapper) {
7238 input =
7239 xmlNewBlanksWrapperInputStream(ctxt, entity);
7240 xmlPushInput(ctxt, input);
7241 } else {
7242 /*
7243 * TODO !!!
7244 * handle the extra spaces added before and after
7245 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7246 */
7247 input = xmlNewEntityInputStream(ctxt, entity);
7248 xmlPushInput(ctxt, input);
7249 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00007250 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00007251 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00007252 xmlParseTextDecl(ctxt);
7253 if (ctxt->errNo ==
7254 XML_ERR_UNSUPPORTED_ENCODING) {
7255 /*
7256 * The XML REC instructs us to stop parsing
7257 * right here
7258 */
7259 ctxt->instate = XML_PARSER_EOF;
7260 return;
7261 }
7262 }
7263 }
7264 }
7265 ctxt->hasPErefs = 1;
7266 } else {
7267 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7268 }
7269 }
Owen Taylor3473f882001-02-23 17:55:21 +00007270 }
7271}
7272
7273/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007274 * xmlLoadEntityContent:
7275 * @ctxt: an XML parser context
7276 * @entity: an unloaded system entity
7277 *
7278 * Load the original content of the given system entity from the
7279 * ExternalID/SystemID given. This is to be used for Included in Literal
7280 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7281 *
7282 * Returns 0 in case of success and -1 in case of failure
7283 */
7284static int
7285xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7286 xmlParserInputPtr input;
7287 xmlBufferPtr buf;
7288 int l, c;
7289 int count = 0;
7290
7291 if ((ctxt == NULL) || (entity == NULL) ||
7292 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7293 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7294 (entity->content != NULL)) {
7295 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7296 "xmlLoadEntityContent parameter error");
7297 return(-1);
7298 }
7299
7300 if (xmlParserDebugEntities)
7301 xmlGenericError(xmlGenericErrorContext,
7302 "Reading %s entity content input\n", entity->name);
7303
7304 buf = xmlBufferCreate();
7305 if (buf == NULL) {
7306 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7307 "xmlLoadEntityContent parameter error");
7308 return(-1);
7309 }
7310
7311 input = xmlNewEntityInputStream(ctxt, entity);
7312 if (input == NULL) {
7313 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7314 "xmlLoadEntityContent input error");
7315 xmlBufferFree(buf);
7316 return(-1);
7317 }
7318
7319 /*
7320 * Push the entity as the current input, read char by char
7321 * saving to the buffer until the end of the entity or an error
7322 */
7323 xmlPushInput(ctxt, input);
7324 GROW;
7325 c = CUR_CHAR(l);
7326 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7327 (IS_CHAR(c))) {
7328 xmlBufferAdd(buf, ctxt->input->cur, l);
7329 if (count++ > 100) {
7330 count = 0;
7331 GROW;
7332 }
7333 NEXTL(l);
7334 c = CUR_CHAR(l);
7335 }
7336
7337 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7338 xmlPopInput(ctxt);
7339 } else if (!IS_CHAR(c)) {
7340 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7341 "xmlLoadEntityContent: invalid char value %d\n",
7342 c);
7343 xmlBufferFree(buf);
7344 return(-1);
7345 }
7346 entity->content = buf->content;
7347 buf->content = NULL;
7348 xmlBufferFree(buf);
7349
7350 return(0);
7351}
7352
7353/**
Owen Taylor3473f882001-02-23 17:55:21 +00007354 * xmlParseStringPEReference:
7355 * @ctxt: an XML parser context
7356 * @str: a pointer to an index in the string
7357 *
7358 * parse PEReference declarations
7359 *
7360 * [69] PEReference ::= '%' Name ';'
7361 *
7362 * [ WFC: No Recursion ]
7363 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007364 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007365 *
7366 * [ WFC: Entity Declared ]
7367 * In a document without any DTD, a document with only an internal DTD
7368 * subset which contains no parameter entity references, or a document
7369 * with "standalone='yes'", ... ... The declaration of a parameter
7370 * entity must precede any reference to it...
7371 *
7372 * [ VC: Entity Declared ]
7373 * In a document with an external subset or external parameter entities
7374 * with "standalone='no'", ... ... The declaration of a parameter entity
7375 * must precede any reference to it...
7376 *
7377 * [ WFC: In DTD ]
7378 * Parameter-entity references may only appear in the DTD.
7379 * NOTE: misleading but this is handled.
7380 *
7381 * Returns the string of the entity content.
7382 * str is updated to the current value of the index
7383 */
7384xmlEntityPtr
7385xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7386 const xmlChar *ptr;
7387 xmlChar cur;
7388 xmlChar *name;
7389 xmlEntityPtr entity = NULL;
7390
7391 if ((str == NULL) || (*str == NULL)) return(NULL);
7392 ptr = *str;
7393 cur = *ptr;
7394 if (cur == '%') {
7395 ptr++;
7396 cur = *ptr;
7397 name = xmlParseStringName(ctxt, &ptr);
7398 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007399 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7400 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007401 } else {
7402 cur = *ptr;
7403 if (cur == ';') {
7404 ptr++;
7405 cur = *ptr;
7406 if ((ctxt->sax != NULL) &&
7407 (ctxt->sax->getParameterEntity != NULL))
7408 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7409 name);
7410 if (entity == NULL) {
7411 /*
7412 * [ WFC: Entity Declared ]
7413 * In a document without any DTD, a document with only an
7414 * internal DTD subset which contains no parameter entity
7415 * references, or a document with "standalone='yes'", ...
7416 * ... The declaration of a parameter entity must precede
7417 * any reference to it...
7418 */
7419 if ((ctxt->standalone == 1) ||
7420 ((ctxt->hasExternalSubset == 0) &&
7421 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007422 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007423 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007424 } else {
7425 /*
7426 * [ VC: Entity Declared ]
7427 * In a document with an external subset or external
7428 * parameter entities with "standalone='no'", ...
7429 * ... The declaration of a parameter entity must
7430 * precede any reference to it...
7431 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007432 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7433 "PEReference: %%%s; not found\n",
7434 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007435 ctxt->valid = 0;
7436 }
7437 } else {
7438 /*
7439 * Internal checking in case the entity quest barfed
7440 */
7441 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7442 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007443 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7444 "%%%s; is not a parameter entity\n",
7445 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007446 }
7447 }
7448 ctxt->hasPErefs = 1;
7449 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007450 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007451 }
7452 xmlFree(name);
7453 }
7454 }
7455 *str = ptr;
7456 return(entity);
7457}
7458
7459/**
7460 * xmlParseDocTypeDecl:
7461 * @ctxt: an XML parser context
7462 *
7463 * parse a DOCTYPE declaration
7464 *
7465 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7466 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7467 *
7468 * [ VC: Root Element Type ]
7469 * The Name in the document type declaration must match the element
7470 * type of the root element.
7471 */
7472
7473void
7474xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007475 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007476 xmlChar *ExternalID = NULL;
7477 xmlChar *URI = NULL;
7478
7479 /*
7480 * We know that '<!DOCTYPE' has been detected.
7481 */
7482 SKIP(9);
7483
7484 SKIP_BLANKS;
7485
7486 /*
7487 * Parse the DOCTYPE name.
7488 */
7489 name = xmlParseName(ctxt);
7490 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007491 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7492 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007493 }
7494 ctxt->intSubName = name;
7495
7496 SKIP_BLANKS;
7497
7498 /*
7499 * Check for SystemID and ExternalID
7500 */
7501 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7502
7503 if ((URI != NULL) || (ExternalID != NULL)) {
7504 ctxt->hasExternalSubset = 1;
7505 }
7506 ctxt->extSubURI = URI;
7507 ctxt->extSubSystem = ExternalID;
7508
7509 SKIP_BLANKS;
7510
7511 /*
7512 * Create and update the internal subset.
7513 */
7514 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7515 (!ctxt->disableSAX))
7516 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7517
7518 /*
7519 * Is there any internal subset declarations ?
7520 * they are handled separately in xmlParseInternalSubset()
7521 */
7522 if (RAW == '[')
7523 return;
7524
7525 /*
7526 * We should be at the end of the DOCTYPE declaration.
7527 */
7528 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007529 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007530 }
7531 NEXT;
7532}
7533
7534/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007535 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007536 * @ctxt: an XML parser context
7537 *
7538 * parse the internal subset declaration
7539 *
7540 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7541 */
7542
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007543static void
Owen Taylor3473f882001-02-23 17:55:21 +00007544xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7545 /*
7546 * Is there any DTD definition ?
7547 */
7548 if (RAW == '[') {
7549 ctxt->instate = XML_PARSER_DTD;
7550 NEXT;
7551 /*
7552 * Parse the succession of Markup declarations and
7553 * PEReferences.
7554 * Subsequence (markupdecl | PEReference | S)*
7555 */
7556 while (RAW != ']') {
7557 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007558 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007559
7560 SKIP_BLANKS;
7561 xmlParseMarkupDecl(ctxt);
7562 xmlParsePEReference(ctxt);
7563
7564 /*
7565 * Pop-up of finished entities.
7566 */
7567 while ((RAW == 0) && (ctxt->inputNr > 1))
7568 xmlPopInput(ctxt);
7569
7570 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007571 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007572 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007573 break;
7574 }
7575 }
7576 if (RAW == ']') {
7577 NEXT;
7578 SKIP_BLANKS;
7579 }
7580 }
7581
7582 /*
7583 * We should be at the end of the DOCTYPE declaration.
7584 */
7585 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007586 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007587 }
7588 NEXT;
7589}
7590
Daniel Veillard81273902003-09-30 00:43:48 +00007591#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007592/**
7593 * xmlParseAttribute:
7594 * @ctxt: an XML parser context
7595 * @value: a xmlChar ** used to store the value of the attribute
7596 *
7597 * parse an attribute
7598 *
7599 * [41] Attribute ::= Name Eq AttValue
7600 *
7601 * [ WFC: No External Entity References ]
7602 * Attribute values cannot contain direct or indirect entity references
7603 * to external entities.
7604 *
7605 * [ WFC: No < in Attribute Values ]
7606 * The replacement text of any entity referred to directly or indirectly in
7607 * an attribute value (other than "&lt;") must not contain a <.
7608 *
7609 * [ VC: Attribute Value Type ]
7610 * The attribute must have been declared; the value must be of the type
7611 * declared for it.
7612 *
7613 * [25] Eq ::= S? '=' S?
7614 *
7615 * With namespace:
7616 *
7617 * [NS 11] Attribute ::= QName Eq AttValue
7618 *
7619 * Also the case QName == xmlns:??? is handled independently as a namespace
7620 * definition.
7621 *
7622 * Returns the attribute name, and the value in *value.
7623 */
7624
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007625const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007626xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007627 const xmlChar *name;
7628 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007629
7630 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007631 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007632 name = xmlParseName(ctxt);
7633 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007634 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007635 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007636 return(NULL);
7637 }
7638
7639 /*
7640 * read the value
7641 */
7642 SKIP_BLANKS;
7643 if (RAW == '=') {
7644 NEXT;
7645 SKIP_BLANKS;
7646 val = xmlParseAttValue(ctxt);
7647 ctxt->instate = XML_PARSER_CONTENT;
7648 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007649 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007650 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007651 return(NULL);
7652 }
7653
7654 /*
7655 * Check that xml:lang conforms to the specification
7656 * No more registered as an error, just generate a warning now
7657 * since this was deprecated in XML second edition
7658 */
7659 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7660 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007661 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7662 "Malformed value for xml:lang : %s\n",
7663 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007664 }
7665 }
7666
7667 /*
7668 * Check that xml:space conforms to the specification
7669 */
7670 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7671 if (xmlStrEqual(val, BAD_CAST "default"))
7672 *(ctxt->space) = 0;
7673 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7674 *(ctxt->space) = 1;
7675 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007676 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007677"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007678 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007679 }
7680 }
7681
7682 *value = val;
7683 return(name);
7684}
7685
7686/**
7687 * xmlParseStartTag:
7688 * @ctxt: an XML parser context
7689 *
7690 * parse a start of tag either for rule element or
7691 * EmptyElement. In both case we don't parse the tag closing chars.
7692 *
7693 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7694 *
7695 * [ WFC: Unique Att Spec ]
7696 * No attribute name may appear more than once in the same start-tag or
7697 * empty-element tag.
7698 *
7699 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7700 *
7701 * [ WFC: Unique Att Spec ]
7702 * No attribute name may appear more than once in the same start-tag or
7703 * empty-element tag.
7704 *
7705 * With namespace:
7706 *
7707 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7708 *
7709 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7710 *
7711 * Returns the element name parsed
7712 */
7713
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007714const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007715xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007716 const xmlChar *name;
7717 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007718 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007719 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007720 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007721 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007722 int i;
7723
7724 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007725 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007726
7727 name = xmlParseName(ctxt);
7728 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007729 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007730 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007731 return(NULL);
7732 }
7733
7734 /*
7735 * Now parse the attributes, it ends up with the ending
7736 *
7737 * (S Attribute)* S?
7738 */
7739 SKIP_BLANKS;
7740 GROW;
7741
Daniel Veillard21a0f912001-02-25 19:54:14 +00007742 while ((RAW != '>') &&
7743 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007744 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007745 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007746 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007747
7748 attname = xmlParseAttribute(ctxt, &attvalue);
7749 if ((attname != NULL) && (attvalue != NULL)) {
7750 /*
7751 * [ WFC: Unique Att Spec ]
7752 * No attribute name may appear more than once in the same
7753 * start-tag or empty-element tag.
7754 */
7755 for (i = 0; i < nbatts;i += 2) {
7756 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007757 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007758 xmlFree(attvalue);
7759 goto failed;
7760 }
7761 }
Owen Taylor3473f882001-02-23 17:55:21 +00007762 /*
7763 * Add the pair to atts
7764 */
7765 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007766 maxatts = 22; /* allow for 10 attrs by default */
7767 atts = (const xmlChar **)
7768 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007769 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007770 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007771 if (attvalue != NULL)
7772 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007773 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007774 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007775 ctxt->atts = atts;
7776 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007777 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007778 const xmlChar **n;
7779
Owen Taylor3473f882001-02-23 17:55:21 +00007780 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007781 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007782 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007783 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007784 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007785 if (attvalue != NULL)
7786 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007787 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007788 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007789 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007790 ctxt->atts = atts;
7791 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007792 }
7793 atts[nbatts++] = attname;
7794 atts[nbatts++] = attvalue;
7795 atts[nbatts] = NULL;
7796 atts[nbatts + 1] = NULL;
7797 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007798 if (attvalue != NULL)
7799 xmlFree(attvalue);
7800 }
7801
7802failed:
7803
Daniel Veillard3772de32002-12-17 10:31:45 +00007804 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007805 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7806 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007807 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7809 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007810 }
7811 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007812 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7813 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007814 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7815 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007816 break;
7817 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007818 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007819 GROW;
7820 }
7821
7822 /*
7823 * SAX: Start of Element !
7824 */
7825 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007826 (!ctxt->disableSAX)) {
7827 if (nbatts > 0)
7828 ctxt->sax->startElement(ctxt->userData, name, atts);
7829 else
7830 ctxt->sax->startElement(ctxt->userData, name, NULL);
7831 }
Owen Taylor3473f882001-02-23 17:55:21 +00007832
7833 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007834 /* Free only the content strings */
7835 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007836 if (atts[i] != NULL)
7837 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007838 }
7839 return(name);
7840}
7841
7842/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007843 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007844 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007845 * @line: line of the start tag
7846 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007847 *
7848 * parse an end of tag
7849 *
7850 * [42] ETag ::= '</' Name S? '>'
7851 *
7852 * With namespace
7853 *
7854 * [NS 9] ETag ::= '</' QName S? '>'
7855 */
7856
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007857static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007858xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007859 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007860
7861 GROW;
7862 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007863 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007864 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007865 return;
7866 }
7867 SKIP(2);
7868
Daniel Veillard46de64e2002-05-29 08:21:33 +00007869 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007870
7871 /*
7872 * We should definitely be at the ending "S? '>'" part
7873 */
7874 GROW;
7875 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007876 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007877 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007878 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007879 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007880
7881 /*
7882 * [ WFC: Element Type Match ]
7883 * The Name in an element's end-tag must match the element type in the
7884 * start-tag.
7885 *
7886 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007887 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007888 if (name == NULL) name = BAD_CAST "unparseable";
7889 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007890 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007891 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007892 }
7893
7894 /*
7895 * SAX: End of Tag
7896 */
7897 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7898 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007899 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007900
Daniel Veillarde57ec792003-09-10 10:50:59 +00007901 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007902 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007903 return;
7904}
7905
7906/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007907 * xmlParseEndTag:
7908 * @ctxt: an XML parser context
7909 *
7910 * parse an end of tag
7911 *
7912 * [42] ETag ::= '</' Name S? '>'
7913 *
7914 * With namespace
7915 *
7916 * [NS 9] ETag ::= '</' QName S? '>'
7917 */
7918
7919void
7920xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921 xmlParseEndTag1(ctxt, 0);
7922}
Daniel Veillard81273902003-09-30 00:43:48 +00007923#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007924
7925/************************************************************************
7926 * *
7927 * SAX 2 specific operations *
7928 * *
7929 ************************************************************************/
7930
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931/*
7932 * xmlGetNamespace:
7933 * @ctxt: an XML parser context
7934 * @prefix: the prefix to lookup
7935 *
7936 * Lookup the namespace name for the @prefix (which ca be NULL)
7937 * The prefix must come from the @ctxt->dict dictionnary
7938 *
7939 * Returns the namespace name or NULL if not bound
7940 */
7941static const xmlChar *
7942xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7943 int i;
7944
Daniel Veillarde57ec792003-09-10 10:50:59 +00007945 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007946 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007947 if (ctxt->nsTab[i] == prefix) {
7948 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7949 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007951 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 return(NULL);
7953}
7954
7955/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007956 * xmlParseQName:
7957 * @ctxt: an XML parser context
7958 * @prefix: pointer to store the prefix part
7959 *
7960 * parse an XML Namespace QName
7961 *
7962 * [6] QName ::= (Prefix ':')? LocalPart
7963 * [7] Prefix ::= NCName
7964 * [8] LocalPart ::= NCName
7965 *
7966 * Returns the Name parsed or NULL
7967 */
7968
7969static const xmlChar *
7970xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7971 const xmlChar *l, *p;
7972
7973 GROW;
7974
7975 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007976 if (l == NULL) {
7977 if (CUR == ':') {
7978 l = xmlParseName(ctxt);
7979 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007980 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7981 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007982 *prefix = NULL;
7983 return(l);
7984 }
7985 }
7986 return(NULL);
7987 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007988 if (CUR == ':') {
7989 NEXT;
7990 p = l;
7991 l = xmlParseNCName(ctxt);
7992 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007993 xmlChar *tmp;
7994
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007995 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7996 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007997 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7998 p = xmlDictLookup(ctxt->dict, tmp, -1);
7999 if (tmp != NULL) xmlFree(tmp);
8000 *prefix = NULL;
8001 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008002 }
8003 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008004 xmlChar *tmp;
8005
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008006 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8007 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008008 NEXT;
8009 tmp = (xmlChar *) xmlParseName(ctxt);
8010 if (tmp != NULL) {
8011 tmp = xmlBuildQName(tmp, l, NULL, 0);
8012 l = xmlDictLookup(ctxt->dict, tmp, -1);
8013 if (tmp != NULL) xmlFree(tmp);
8014 *prefix = p;
8015 return(l);
8016 }
8017 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8018 l = xmlDictLookup(ctxt->dict, tmp, -1);
8019 if (tmp != NULL) xmlFree(tmp);
8020 *prefix = p;
8021 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008022 }
8023 *prefix = p;
8024 } else
8025 *prefix = NULL;
8026 return(l);
8027}
8028
8029/**
8030 * xmlParseQNameAndCompare:
8031 * @ctxt: an XML parser context
8032 * @name: the localname
8033 * @prefix: the prefix, if any.
8034 *
8035 * parse an XML name and compares for match
8036 * (specialized for endtag parsing)
8037 *
8038 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8039 * and the name for mismatch
8040 */
8041
8042static const xmlChar *
8043xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8044 xmlChar const *prefix) {
8045 const xmlChar *cmp = name;
8046 const xmlChar *in;
8047 const xmlChar *ret;
8048 const xmlChar *prefix2;
8049
8050 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8051
8052 GROW;
8053 in = ctxt->input->cur;
8054
8055 cmp = prefix;
8056 while (*in != 0 && *in == *cmp) {
8057 ++in;
8058 ++cmp;
8059 }
8060 if ((*cmp == 0) && (*in == ':')) {
8061 in++;
8062 cmp = name;
8063 while (*in != 0 && *in == *cmp) {
8064 ++in;
8065 ++cmp;
8066 }
William M. Brack76e95df2003-10-18 16:20:14 +00008067 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008068 /* success */
8069 ctxt->input->cur = in;
8070 return((const xmlChar*) 1);
8071 }
8072 }
8073 /*
8074 * all strings coms from the dictionary, equality can be done directly
8075 */
8076 ret = xmlParseQName (ctxt, &prefix2);
8077 if ((ret == name) && (prefix == prefix2))
8078 return((const xmlChar*) 1);
8079 return ret;
8080}
8081
8082/**
8083 * xmlParseAttValueInternal:
8084 * @ctxt: an XML parser context
8085 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008086 * @alloc: whether the attribute was reallocated as a new string
8087 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008088 *
8089 * parse a value for an attribute.
8090 * NOTE: if no normalization is needed, the routine will return pointers
8091 * directly from the data buffer.
8092 *
8093 * 3.3.3 Attribute-Value Normalization:
8094 * Before the value of an attribute is passed to the application or
8095 * checked for validity, the XML processor must normalize it as follows:
8096 * - a character reference is processed by appending the referenced
8097 * character to the attribute value
8098 * - an entity reference is processed by recursively processing the
8099 * replacement text of the entity
8100 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8101 * appending #x20 to the normalized value, except that only a single
8102 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8103 * parsed entity or the literal entity value of an internal parsed entity
8104 * - other characters are processed by appending them to the normalized value
8105 * If the declared value is not CDATA, then the XML processor must further
8106 * process the normalized attribute value by discarding any leading and
8107 * trailing space (#x20) characters, and by replacing sequences of space
8108 * (#x20) characters by a single space (#x20) character.
8109 * All attributes for which no declaration has been read should be treated
8110 * by a non-validating parser as if declared CDATA.
8111 *
8112 * Returns the AttValue parsed or NULL. The value has to be freed by the
8113 * caller if it was copied, this can be detected by val[*len] == 0.
8114 */
8115
8116static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008117xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8118 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008119{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008120 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008121 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008122 xmlChar *ret = NULL;
8123
8124 GROW;
8125 in = (xmlChar *) CUR_PTR;
8126 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008127 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008128 return (NULL);
8129 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008130 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008131
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008132 /*
8133 * try to handle in this routine the most common case where no
8134 * allocation of a new string is required and where content is
8135 * pure ASCII.
8136 */
8137 limit = *in++;
8138 end = ctxt->input->end;
8139 start = in;
8140 if (in >= end) {
8141 const xmlChar *oldbase = ctxt->input->base;
8142 GROW;
8143 if (oldbase != ctxt->input->base) {
8144 long delta = ctxt->input->base - oldbase;
8145 start = start + delta;
8146 in = in + delta;
8147 }
8148 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008149 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008150 if (normalize) {
8151 /*
8152 * Skip any leading spaces
8153 */
8154 while ((in < end) && (*in != limit) &&
8155 ((*in == 0x20) || (*in == 0x9) ||
8156 (*in == 0xA) || (*in == 0xD))) {
8157 in++;
8158 start = in;
8159 if (in >= end) {
8160 const xmlChar *oldbase = ctxt->input->base;
8161 GROW;
8162 if (oldbase != ctxt->input->base) {
8163 long delta = ctxt->input->base - oldbase;
8164 start = start + delta;
8165 in = in + delta;
8166 }
8167 end = ctxt->input->end;
8168 }
8169 }
8170 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8171 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8172 if ((*in++ == 0x20) && (*in == 0x20)) break;
8173 if (in >= end) {
8174 const xmlChar *oldbase = ctxt->input->base;
8175 GROW;
8176 if (oldbase != ctxt->input->base) {
8177 long delta = ctxt->input->base - oldbase;
8178 start = start + delta;
8179 in = in + delta;
8180 }
8181 end = ctxt->input->end;
8182 }
8183 }
8184 last = in;
8185 /*
8186 * skip the trailing blanks
8187 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008188 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008189 while ((in < end) && (*in != limit) &&
8190 ((*in == 0x20) || (*in == 0x9) ||
8191 (*in == 0xA) || (*in == 0xD))) {
8192 in++;
8193 if (in >= end) {
8194 const xmlChar *oldbase = ctxt->input->base;
8195 GROW;
8196 if (oldbase != ctxt->input->base) {
8197 long delta = ctxt->input->base - oldbase;
8198 start = start + delta;
8199 in = in + delta;
8200 last = last + delta;
8201 }
8202 end = ctxt->input->end;
8203 }
8204 }
8205 if (*in != limit) goto need_complex;
8206 } else {
8207 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8208 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8209 in++;
8210 if (in >= end) {
8211 const xmlChar *oldbase = ctxt->input->base;
8212 GROW;
8213 if (oldbase != ctxt->input->base) {
8214 long delta = ctxt->input->base - oldbase;
8215 start = start + delta;
8216 in = in + delta;
8217 }
8218 end = ctxt->input->end;
8219 }
8220 }
8221 last = in;
8222 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008223 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008224 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008225 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008226 *len = last - start;
8227 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008229 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008230 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231 }
8232 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008233 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008235need_complex:
8236 if (alloc) *alloc = 1;
8237 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008238}
8239
8240/**
8241 * xmlParseAttribute2:
8242 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008243 * @pref: the element prefix
8244 * @elem: the element name
8245 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008247 * @len: an int * to save the length of the attribute
8248 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008249 *
8250 * parse an attribute in the new SAX2 framework.
8251 *
8252 * Returns the attribute name, and the value in *value, .
8253 */
8254
8255static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008256xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008257 const xmlChar * pref, const xmlChar * elem,
8258 const xmlChar ** prefix, xmlChar ** value,
8259 int *len, int *alloc)
8260{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008261 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008262 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008263 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264
8265 *value = NULL;
8266 GROW;
8267 name = xmlParseQName(ctxt, prefix);
8268 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8270 "error parsing attribute name\n");
8271 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008272 }
8273
8274 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008275 * get the type if needed
8276 */
8277 if (ctxt->attsSpecial != NULL) {
8278 int type;
8279
8280 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008281 pref, elem, *prefix, name);
8282 if (type != 0)
8283 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008284 }
8285
8286 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008287 * read the value
8288 */
8289 SKIP_BLANKS;
8290 if (RAW == '=') {
8291 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008292 SKIP_BLANKS;
8293 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8294 if (normalize) {
8295 /*
8296 * Sometimes a second normalisation pass for spaces is needed
8297 * but that only happens if charrefs or entities refernces
8298 * have been used in the attribute value, i.e. the attribute
8299 * value have been extracted in an allocated string already.
8300 */
8301 if (*alloc) {
8302 const xmlChar *val2;
8303
8304 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8305 if (val2 != NULL) {
8306 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008307 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008308 }
8309 }
8310 }
8311 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008312 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008313 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8314 "Specification mandate value for attribute %s\n",
8315 name);
8316 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008317 }
8318
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008319 if (*prefix == ctxt->str_xml) {
8320 /*
8321 * Check that xml:lang conforms to the specification
8322 * No more registered as an error, just generate a warning now
8323 * since this was deprecated in XML second edition
8324 */
8325 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8326 internal_val = xmlStrndup(val, *len);
8327 if (!xmlCheckLanguageID(internal_val)) {
8328 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8329 "Malformed value for xml:lang : %s\n",
8330 internal_val, NULL);
8331 }
8332 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008333
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008334 /*
8335 * Check that xml:space conforms to the specification
8336 */
8337 if (xmlStrEqual(name, BAD_CAST "space")) {
8338 internal_val = xmlStrndup(val, *len);
8339 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8340 *(ctxt->space) = 0;
8341 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8342 *(ctxt->space) = 1;
8343 else {
8344 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8345 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8346 internal_val, NULL);
8347 }
8348 }
8349 if (internal_val) {
8350 xmlFree(internal_val);
8351 }
8352 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008353
8354 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008355 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008356}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008357/**
8358 * xmlParseStartTag2:
8359 * @ctxt: an XML parser context
8360 *
8361 * parse a start of tag either for rule element or
8362 * EmptyElement. In both case we don't parse the tag closing chars.
8363 * This routine is called when running SAX2 parsing
8364 *
8365 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8366 *
8367 * [ WFC: Unique Att Spec ]
8368 * No attribute name may appear more than once in the same start-tag or
8369 * empty-element tag.
8370 *
8371 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8372 *
8373 * [ WFC: Unique Att Spec ]
8374 * No attribute name may appear more than once in the same start-tag or
8375 * empty-element tag.
8376 *
8377 * With namespace:
8378 *
8379 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8380 *
8381 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8382 *
8383 * Returns the element name parsed
8384 */
8385
8386static const xmlChar *
8387xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008388 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008389 const xmlChar *localname;
8390 const xmlChar *prefix;
8391 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008392 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008393 const xmlChar *nsname;
8394 xmlChar *attvalue;
8395 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008396 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008397 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008398 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008399 const xmlChar *base;
8400 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008401 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008402
8403 if (RAW != '<') return(NULL);
8404 NEXT1;
8405
8406 /*
8407 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8408 * point since the attribute values may be stored as pointers to
8409 * the buffer and calling SHRINK would destroy them !
8410 * The Shrinking is only possible once the full set of attribute
8411 * callbacks have been done.
8412 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008413reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008414 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008415 base = ctxt->input->base;
8416 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008417 oldline = ctxt->input->line;
8418 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008419 nbatts = 0;
8420 nratts = 0;
8421 nbdef = 0;
8422 nbNs = 0;
8423 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008424 /* Forget any namespaces added during an earlier parse of this element. */
8425 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008426
8427 localname = xmlParseQName(ctxt, &prefix);
8428 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008429 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8430 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008431 return(NULL);
8432 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008433 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008434
8435 /*
8436 * Now parse the attributes, it ends up with the ending
8437 *
8438 * (S Attribute)* S?
8439 */
8440 SKIP_BLANKS;
8441 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008442 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008443
8444 while ((RAW != '>') &&
8445 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008446 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008447 const xmlChar *q = CUR_PTR;
8448 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008449 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008450
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008451 attname = xmlParseAttribute2(ctxt, prefix, localname,
8452 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008453 if (ctxt->input->base != base) {
8454 if ((attvalue != NULL) && (alloc != 0))
8455 xmlFree(attvalue);
8456 attvalue = NULL;
8457 goto base_changed;
8458 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 if ((attname != NULL) && (attvalue != NULL)) {
8460 if (len < 0) len = xmlStrlen(attvalue);
8461 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008462 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8463 xmlURIPtr uri;
8464
8465 if (*URL != 0) {
8466 uri = xmlParseURI((const char *) URL);
8467 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008468 xmlNsErr(ctxt, XML_WAR_NS_URI,
8469 "xmlns: '%s' is not a valid URI\n",
8470 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008471 } else {
Daniel Veillard37334572008-07-31 08:20:02 +00008472 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8473 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8474 "xmlns: URI %s is not absolute\n",
8475 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008476 }
8477 xmlFreeURI(uri);
8478 }
Daniel Veillard37334572008-07-31 08:20:02 +00008479 if (URL == ctxt->str_xml_ns) {
8480 if (attname != ctxt->str_xml) {
8481 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8482 "xml namespace URI cannot be the default namespace\n",
8483 NULL, NULL, NULL);
8484 }
8485 goto skip_default_ns;
8486 }
8487 if ((len == 29) &&
8488 (xmlStrEqual(URL,
8489 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8490 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8491 "reuse of the xmlns namespace name is forbidden\n",
8492 NULL, NULL, NULL);
8493 goto skip_default_ns;
8494 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008495 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008497 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008498 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008499 for (j = 1;j <= nbNs;j++)
8500 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8501 break;
8502 if (j <= nbNs)
8503 xmlErrAttributeDup(ctxt, NULL, attname);
8504 else
8505 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008506skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008507 if (alloc != 0) xmlFree(attvalue);
8508 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008509 continue;
8510 }
8511 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008512 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8513 xmlURIPtr uri;
8514
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008515 if (attname == ctxt->str_xml) {
8516 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008517 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8518 "xml namespace prefix mapped to wrong URI\n",
8519 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008520 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008521 /*
8522 * Do not keep a namespace definition node
8523 */
Daniel Veillard37334572008-07-31 08:20:02 +00008524 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008525 }
Daniel Veillard37334572008-07-31 08:20:02 +00008526 if (URL == ctxt->str_xml_ns) {
8527 if (attname != ctxt->str_xml) {
8528 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8529 "xml namespace URI mapped to wrong prefix\n",
8530 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008531 }
Daniel Veillard37334572008-07-31 08:20:02 +00008532 goto skip_ns;
8533 }
8534 if (attname == ctxt->str_xmlns) {
8535 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8536 "redefinition of the xmlns prefix is forbidden\n",
8537 NULL, NULL, NULL);
8538 goto skip_ns;
8539 }
8540 if ((len == 29) &&
8541 (xmlStrEqual(URL,
8542 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8543 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8544 "reuse of the xmlns namespace name is forbidden\n",
8545 NULL, NULL, NULL);
8546 goto skip_ns;
8547 }
8548 if ((URL == NULL) || (URL[0] == 0)) {
8549 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8550 "xmlns:%s: Empty XML namespace is not allowed\n",
8551 attname, NULL, NULL);
8552 goto skip_ns;
8553 } else {
8554 uri = xmlParseURI((const char *) URL);
8555 if (uri == NULL) {
8556 xmlNsErr(ctxt, XML_WAR_NS_URI,
8557 "xmlns:%s: '%s' is not a valid URI\n",
8558 attname, URL, NULL);
8559 } else {
8560 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8561 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8562 "xmlns:%s: URI %s is not absolute\n",
8563 attname, URL, NULL);
8564 }
8565 xmlFreeURI(uri);
8566 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008567 }
8568
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008570 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008572 for (j = 1;j <= nbNs;j++)
8573 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8574 break;
8575 if (j <= nbNs)
8576 xmlErrAttributeDup(ctxt, aprefix, attname);
8577 else
8578 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008579skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008580 if (alloc != 0) xmlFree(attvalue);
8581 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008582 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583 continue;
8584 }
8585
8586 /*
8587 * Add the pair to atts
8588 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008589 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8590 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008591 if (attvalue[len] == 0)
8592 xmlFree(attvalue);
8593 goto failed;
8594 }
8595 maxatts = ctxt->maxatts;
8596 atts = ctxt->atts;
8597 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008598 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008599 atts[nbatts++] = attname;
8600 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008601 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008602 atts[nbatts++] = attvalue;
8603 attvalue += len;
8604 atts[nbatts++] = attvalue;
8605 /*
8606 * tag if some deallocation is needed
8607 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008608 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008609 } else {
8610 if ((attvalue != NULL) && (attvalue[len] == 0))
8611 xmlFree(attvalue);
8612 }
8613
Daniel Veillard37334572008-07-31 08:20:02 +00008614failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008615
8616 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008617 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8619 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008620 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008621 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8622 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008623 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008624 }
8625 SKIP_BLANKS;
8626 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8627 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008628 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008629 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008630 break;
8631 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008632 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008633 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008634 }
8635
Daniel Veillard0fb18932003-09-07 09:14:37 +00008636 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008637 * The attributes defaulting
8638 */
8639 if (ctxt->attsDefault != NULL) {
8640 xmlDefAttrsPtr defaults;
8641
8642 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8643 if (defaults != NULL) {
8644 for (i = 0;i < defaults->nbAttrs;i++) {
8645 attname = defaults->values[4 * i];
8646 aprefix = defaults->values[4 * i + 1];
8647
8648 /*
8649 * special work for namespaces defaulted defs
8650 */
8651 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8652 /*
8653 * check that it's not a defined namespace
8654 */
8655 for (j = 1;j <= nbNs;j++)
8656 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8657 break;
8658 if (j <= nbNs) continue;
8659
8660 nsname = xmlGetNamespace(ctxt, NULL);
8661 if (nsname != defaults->values[4 * i + 2]) {
8662 if (nsPush(ctxt, NULL,
8663 defaults->values[4 * i + 2]) > 0)
8664 nbNs++;
8665 }
8666 } else if (aprefix == ctxt->str_xmlns) {
8667 /*
8668 * check that it's not a defined namespace
8669 */
8670 for (j = 1;j <= nbNs;j++)
8671 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8672 break;
8673 if (j <= nbNs) continue;
8674
8675 nsname = xmlGetNamespace(ctxt, attname);
8676 if (nsname != defaults->values[2]) {
8677 if (nsPush(ctxt, attname,
8678 defaults->values[4 * i + 2]) > 0)
8679 nbNs++;
8680 }
8681 } else {
8682 /*
8683 * check that it's not a defined attribute
8684 */
8685 for (j = 0;j < nbatts;j+=5) {
8686 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8687 break;
8688 }
8689 if (j < nbatts) continue;
8690
8691 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8692 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008693 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008694 }
8695 maxatts = ctxt->maxatts;
8696 atts = ctxt->atts;
8697 }
8698 atts[nbatts++] = attname;
8699 atts[nbatts++] = aprefix;
8700 if (aprefix == NULL)
8701 atts[nbatts++] = NULL;
8702 else
8703 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8704 atts[nbatts++] = defaults->values[4 * i + 2];
8705 atts[nbatts++] = defaults->values[4 * i + 3];
8706 nbdef++;
8707 }
8708 }
8709 }
8710 }
8711
Daniel Veillarde70c8772003-11-25 07:21:18 +00008712 /*
8713 * The attributes checkings
8714 */
8715 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008716 /*
8717 * The default namespace does not apply to attribute names.
8718 */
8719 if (atts[i + 1] != NULL) {
8720 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8721 if (nsname == NULL) {
8722 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8723 "Namespace prefix %s for %s on %s is not defined\n",
8724 atts[i + 1], atts[i], localname);
8725 }
8726 atts[i + 2] = nsname;
8727 } else
8728 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008729 /*
8730 * [ WFC: Unique Att Spec ]
8731 * No attribute name may appear more than once in the same
8732 * start-tag or empty-element tag.
8733 * As extended by the Namespace in XML REC.
8734 */
8735 for (j = 0; j < i;j += 5) {
8736 if (atts[i] == atts[j]) {
8737 if (atts[i+1] == atts[j+1]) {
8738 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8739 break;
8740 }
8741 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8742 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8743 "Namespaced Attribute %s in '%s' redefined\n",
8744 atts[i], nsname, NULL);
8745 break;
8746 }
8747 }
8748 }
8749 }
8750
Daniel Veillarde57ec792003-09-10 10:50:59 +00008751 nsname = xmlGetNamespace(ctxt, prefix);
8752 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008753 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8754 "Namespace prefix %s on %s is not defined\n",
8755 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 }
8757 *pref = prefix;
8758 *URI = nsname;
8759
8760 /*
8761 * SAX: Start of Element !
8762 */
8763 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8764 (!ctxt->disableSAX)) {
8765 if (nbNs > 0)
8766 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8767 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8768 nbatts / 5, nbdef, atts);
8769 else
8770 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8771 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8772 }
8773
8774 /*
8775 * Free up attribute allocated strings if needed
8776 */
8777 if (attval != 0) {
8778 for (i = 3,j = 0; j < nratts;i += 5,j++)
8779 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8780 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008781 }
8782
8783 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008784
8785base_changed:
8786 /*
8787 * the attribute strings are valid iif the base didn't changed
8788 */
8789 if (attval != 0) {
8790 for (i = 3,j = 0; j < nratts;i += 5,j++)
8791 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8792 xmlFree((xmlChar *) atts[i]);
8793 }
8794 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008795 ctxt->input->line = oldline;
8796 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008797 if (ctxt->wellFormed == 1) {
8798 goto reparse;
8799 }
8800 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008801}
8802
8803/**
8804 * xmlParseEndTag2:
8805 * @ctxt: an XML parser context
8806 * @line: line of the start tag
8807 * @nsNr: number of namespaces on the start tag
8808 *
8809 * parse an end of tag
8810 *
8811 * [42] ETag ::= '</' Name S? '>'
8812 *
8813 * With namespace
8814 *
8815 * [NS 9] ETag ::= '</' QName S? '>'
8816 */
8817
8818static void
8819xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008820 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008821 const xmlChar *name;
8822
8823 GROW;
8824 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008825 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008826 return;
8827 }
8828 SKIP(2);
8829
William M. Brack13dfa872004-09-18 04:52:08 +00008830 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008831 if (ctxt->input->cur[tlen] == '>') {
8832 ctxt->input->cur += tlen + 1;
8833 goto done;
8834 }
8835 ctxt->input->cur += tlen;
8836 name = (xmlChar*)1;
8837 } else {
8838 if (prefix == NULL)
8839 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8840 else
8841 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8842 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008843
8844 /*
8845 * We should definitely be at the ending "S? '>'" part
8846 */
8847 GROW;
8848 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008849 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008850 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851 } else
8852 NEXT1;
8853
8854 /*
8855 * [ WFC: Element Type Match ]
8856 * The Name in an element's end-tag must match the element type in the
8857 * start-tag.
8858 *
8859 */
8860 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008861 if (name == NULL) name = BAD_CAST "unparseable";
8862 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008864 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 }
8866
8867 /*
8868 * SAX: End of Tag
8869 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008870done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008871 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8872 (!ctxt->disableSAX))
8873 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8874
Daniel Veillard0fb18932003-09-07 09:14:37 +00008875 spacePop(ctxt);
8876 if (nsNr != 0)
8877 nsPop(ctxt, nsNr);
8878 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008879}
8880
8881/**
Owen Taylor3473f882001-02-23 17:55:21 +00008882 * xmlParseCDSect:
8883 * @ctxt: an XML parser context
8884 *
8885 * Parse escaped pure raw content.
8886 *
8887 * [18] CDSect ::= CDStart CData CDEnd
8888 *
8889 * [19] CDStart ::= '<![CDATA['
8890 *
8891 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8892 *
8893 * [21] CDEnd ::= ']]>'
8894 */
8895void
8896xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8897 xmlChar *buf = NULL;
8898 int len = 0;
8899 int size = XML_PARSER_BUFFER_SIZE;
8900 int r, rl;
8901 int s, sl;
8902 int cur, l;
8903 int count = 0;
8904
Daniel Veillard8f597c32003-10-06 08:19:27 +00008905 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008906 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008907 SKIP(9);
8908 } else
8909 return;
8910
8911 ctxt->instate = XML_PARSER_CDATA_SECTION;
8912 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008913 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008914 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008915 ctxt->instate = XML_PARSER_CONTENT;
8916 return;
8917 }
8918 NEXTL(rl);
8919 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008920 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008921 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008922 ctxt->instate = XML_PARSER_CONTENT;
8923 return;
8924 }
8925 NEXTL(sl);
8926 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008927 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008928 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008929 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008930 return;
8931 }
William M. Brack871611b2003-10-18 04:53:14 +00008932 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008933 ((r != ']') || (s != ']') || (cur != '>'))) {
8934 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008935 xmlChar *tmp;
8936
Owen Taylor3473f882001-02-23 17:55:21 +00008937 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008938 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8939 if (tmp == NULL) {
8940 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008941 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008942 return;
8943 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008944 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008945 }
8946 COPY_BUF(rl,buf,len,r);
8947 r = s;
8948 rl = sl;
8949 s = cur;
8950 sl = l;
8951 count++;
8952 if (count > 50) {
8953 GROW;
8954 count = 0;
8955 }
8956 NEXTL(l);
8957 cur = CUR_CHAR(l);
8958 }
8959 buf[len] = 0;
8960 ctxt->instate = XML_PARSER_CONTENT;
8961 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008962 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008963 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008964 xmlFree(buf);
8965 return;
8966 }
8967 NEXTL(l);
8968
8969 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008970 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008971 */
8972 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8973 if (ctxt->sax->cdataBlock != NULL)
8974 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008975 else if (ctxt->sax->characters != NULL)
8976 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008977 }
8978 xmlFree(buf);
8979}
8980
8981/**
8982 * xmlParseContent:
8983 * @ctxt: an XML parser context
8984 *
8985 * Parse a content:
8986 *
8987 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8988 */
8989
8990void
8991xmlParseContent(xmlParserCtxtPtr ctxt) {
8992 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008993 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008994 ((RAW != '<') || (NXT(1) != '/')) &&
8995 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008996 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008997 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008998 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008999
9000 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009001 * First case : a Processing Instruction.
9002 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009003 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009004 xmlParsePI(ctxt);
9005 }
9006
9007 /*
9008 * Second case : a CDSection
9009 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009010 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009011 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009012 xmlParseCDSect(ctxt);
9013 }
9014
9015 /*
9016 * Third case : a comment
9017 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009018 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009019 (NXT(2) == '-') && (NXT(3) == '-')) {
9020 xmlParseComment(ctxt);
9021 ctxt->instate = XML_PARSER_CONTENT;
9022 }
9023
9024 /*
9025 * Fourth case : a sub-element.
9026 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009027 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009028 xmlParseElement(ctxt);
9029 }
9030
9031 /*
9032 * Fifth case : a reference. If if has not been resolved,
9033 * parsing returns it's Name, create the node
9034 */
9035
Daniel Veillard21a0f912001-02-25 19:54:14 +00009036 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009037 xmlParseReference(ctxt);
9038 }
9039
9040 /*
9041 * Last case, text. Note that References are handled directly.
9042 */
9043 else {
9044 xmlParseCharData(ctxt, 0);
9045 }
9046
9047 GROW;
9048 /*
9049 * Pop-up of finished entities.
9050 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009051 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009052 xmlPopInput(ctxt);
9053 SHRINK;
9054
Daniel Veillardfdc91562002-07-01 21:52:03 +00009055 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009056 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9057 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009058 ctxt->instate = XML_PARSER_EOF;
9059 break;
9060 }
9061 }
9062}
9063
9064/**
9065 * xmlParseElement:
9066 * @ctxt: an XML parser context
9067 *
9068 * parse an XML element, this is highly recursive
9069 *
9070 * [39] element ::= EmptyElemTag | STag content ETag
9071 *
9072 * [ WFC: Element Type Match ]
9073 * The Name in an element's end-tag must match the element type in the
9074 * start-tag.
9075 *
Owen Taylor3473f882001-02-23 17:55:21 +00009076 */
9077
9078void
9079xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009080 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009081 const xmlChar *prefix;
9082 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009083 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009084 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009085 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009086 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009087
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009088 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
9089 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9090 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
9091 xmlParserMaxDepth);
9092 ctxt->instate = XML_PARSER_EOF;
9093 return;
9094 }
9095
Owen Taylor3473f882001-02-23 17:55:21 +00009096 /* Capture start position */
9097 if (ctxt->record_info) {
9098 node_info.begin_pos = ctxt->input->consumed +
9099 (CUR_PTR - ctxt->input->base);
9100 node_info.begin_line = ctxt->input->line;
9101 }
9102
9103 if (ctxt->spaceNr == 0)
9104 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009105 else if (*ctxt->space == -2)
9106 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009107 else
9108 spacePush(ctxt, *ctxt->space);
9109
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009110 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009111#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009112 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009113#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009114 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009115#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009116 else
9117 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009118#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009119 if (name == NULL) {
9120 spacePop(ctxt);
9121 return;
9122 }
9123 namePush(ctxt, name);
9124 ret = ctxt->node;
9125
Daniel Veillard4432df22003-09-28 18:58:27 +00009126#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009127 /*
9128 * [ VC: Root Element Type ]
9129 * The Name in the document type declaration must match the element
9130 * type of the root element.
9131 */
9132 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9133 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9134 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009135#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009136
9137 /*
9138 * Check for an Empty Element.
9139 */
9140 if ((RAW == '/') && (NXT(1) == '>')) {
9141 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142 if (ctxt->sax2) {
9143 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9144 (!ctxt->disableSAX))
9145 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009146#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147 } else {
9148 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9149 (!ctxt->disableSAX))
9150 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009151#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009152 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153 namePop(ctxt);
9154 spacePop(ctxt);
9155 if (nsNr != ctxt->nsNr)
9156 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009157 if ( ret != NULL && ctxt->record_info ) {
9158 node_info.end_pos = ctxt->input->consumed +
9159 (CUR_PTR - ctxt->input->base);
9160 node_info.end_line = ctxt->input->line;
9161 node_info.node = ret;
9162 xmlParserAddNodeInfo(ctxt, &node_info);
9163 }
9164 return;
9165 }
9166 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009167 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009168 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009169 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9170 "Couldn't find end of Start Tag %s line %d\n",
9171 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009172
9173 /*
9174 * end of parsing of this node.
9175 */
9176 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009177 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009178 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009179 if (nsNr != ctxt->nsNr)
9180 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009181
9182 /*
9183 * Capture end position and add node
9184 */
9185 if ( ret != NULL && ctxt->record_info ) {
9186 node_info.end_pos = ctxt->input->consumed +
9187 (CUR_PTR - ctxt->input->base);
9188 node_info.end_line = ctxt->input->line;
9189 node_info.node = ret;
9190 xmlParserAddNodeInfo(ctxt, &node_info);
9191 }
9192 return;
9193 }
9194
9195 /*
9196 * Parse the content of the element:
9197 */
9198 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009199 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009200 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009201 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009202 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009203
9204 /*
9205 * end of parsing of this node.
9206 */
9207 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009209 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210 if (nsNr != ctxt->nsNr)
9211 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009212 return;
9213 }
9214
9215 /*
9216 * parse the end of tag: '</' should be here.
9217 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009218 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009219 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009220 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009221 }
9222#ifdef LIBXML_SAX1_ENABLED
9223 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009224 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009225#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009226
9227 /*
9228 * Capture end position and add node
9229 */
9230 if ( ret != NULL && ctxt->record_info ) {
9231 node_info.end_pos = ctxt->input->consumed +
9232 (CUR_PTR - ctxt->input->base);
9233 node_info.end_line = ctxt->input->line;
9234 node_info.node = ret;
9235 xmlParserAddNodeInfo(ctxt, &node_info);
9236 }
9237}
9238
9239/**
9240 * xmlParseVersionNum:
9241 * @ctxt: an XML parser context
9242 *
9243 * parse the XML version value.
9244 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009245 * [26] VersionNum ::= '1.' [0-9]+
9246 *
9247 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009248 *
9249 * Returns the string giving the XML version number, or NULL
9250 */
9251xmlChar *
9252xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9253 xmlChar *buf = NULL;
9254 int len = 0;
9255 int size = 10;
9256 xmlChar cur;
9257
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009258 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009259 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009260 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009261 return(NULL);
9262 }
9263 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009264 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009265 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009266 return(NULL);
9267 }
9268 buf[len++] = cur;
9269 NEXT;
9270 cur=CUR;
9271 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009272 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009273 return(NULL);
9274 }
9275 buf[len++] = cur;
9276 NEXT;
9277 cur=CUR;
9278 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009279 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009280 xmlChar *tmp;
9281
Owen Taylor3473f882001-02-23 17:55:21 +00009282 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009283 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9284 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009285 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009286 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009287 return(NULL);
9288 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009289 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009290 }
9291 buf[len++] = cur;
9292 NEXT;
9293 cur=CUR;
9294 }
9295 buf[len] = 0;
9296 return(buf);
9297}
9298
9299/**
9300 * xmlParseVersionInfo:
9301 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009302 *
Owen Taylor3473f882001-02-23 17:55:21 +00009303 * parse the XML version.
9304 *
9305 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009306 *
Owen Taylor3473f882001-02-23 17:55:21 +00009307 * [25] Eq ::= S? '=' S?
9308 *
9309 * Returns the version string, e.g. "1.0"
9310 */
9311
9312xmlChar *
9313xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9314 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009315
Daniel Veillarda07050d2003-10-19 14:46:32 +00009316 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009317 SKIP(7);
9318 SKIP_BLANKS;
9319 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009320 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009321 return(NULL);
9322 }
9323 NEXT;
9324 SKIP_BLANKS;
9325 if (RAW == '"') {
9326 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009327 version = xmlParseVersionNum(ctxt);
9328 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009329 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009330 } else
9331 NEXT;
9332 } else if (RAW == '\''){
9333 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009334 version = xmlParseVersionNum(ctxt);
9335 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009336 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009337 } else
9338 NEXT;
9339 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009340 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009341 }
9342 }
9343 return(version);
9344}
9345
9346/**
9347 * xmlParseEncName:
9348 * @ctxt: an XML parser context
9349 *
9350 * parse the XML encoding name
9351 *
9352 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9353 *
9354 * Returns the encoding name value or NULL
9355 */
9356xmlChar *
9357xmlParseEncName(xmlParserCtxtPtr ctxt) {
9358 xmlChar *buf = NULL;
9359 int len = 0;
9360 int size = 10;
9361 xmlChar cur;
9362
9363 cur = CUR;
9364 if (((cur >= 'a') && (cur <= 'z')) ||
9365 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009366 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009367 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009368 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009369 return(NULL);
9370 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009371
Owen Taylor3473f882001-02-23 17:55:21 +00009372 buf[len++] = cur;
9373 NEXT;
9374 cur = CUR;
9375 while (((cur >= 'a') && (cur <= 'z')) ||
9376 ((cur >= 'A') && (cur <= 'Z')) ||
9377 ((cur >= '0') && (cur <= '9')) ||
9378 (cur == '.') || (cur == '_') ||
9379 (cur == '-')) {
9380 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009381 xmlChar *tmp;
9382
Owen Taylor3473f882001-02-23 17:55:21 +00009383 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009384 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9385 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009386 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009387 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009388 return(NULL);
9389 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009390 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009391 }
9392 buf[len++] = cur;
9393 NEXT;
9394 cur = CUR;
9395 if (cur == 0) {
9396 SHRINK;
9397 GROW;
9398 cur = CUR;
9399 }
9400 }
9401 buf[len] = 0;
9402 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009403 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009404 }
9405 return(buf);
9406}
9407
9408/**
9409 * xmlParseEncodingDecl:
9410 * @ctxt: an XML parser context
9411 *
9412 * parse the XML encoding declaration
9413 *
9414 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9415 *
9416 * this setups the conversion filters.
9417 *
9418 * Returns the encoding value or NULL
9419 */
9420
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009421const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009422xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9423 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009424
9425 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009426 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009427 SKIP(8);
9428 SKIP_BLANKS;
9429 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009430 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009431 return(NULL);
9432 }
9433 NEXT;
9434 SKIP_BLANKS;
9435 if (RAW == '"') {
9436 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009437 encoding = xmlParseEncName(ctxt);
9438 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009439 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009440 } else
9441 NEXT;
9442 } else if (RAW == '\''){
9443 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009444 encoding = xmlParseEncName(ctxt);
9445 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009446 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009447 } else
9448 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009449 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009450 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009451 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009452 /*
9453 * UTF-16 encoding stwich has already taken place at this stage,
9454 * more over the little-endian/big-endian selection is already done
9455 */
9456 if ((encoding != NULL) &&
9457 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9458 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009459 /*
9460 * If no encoding was passed to the parser, that we are
9461 * using UTF-16 and no decoder is present i.e. the
9462 * document is apparently UTF-8 compatible, then raise an
9463 * encoding mismatch fatal error
9464 */
9465 if ((ctxt->encoding == NULL) &&
9466 (ctxt->input->buf != NULL) &&
9467 (ctxt->input->buf->encoder == NULL)) {
9468 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9469 "Document labelled UTF-16 but has UTF-8 content\n");
9470 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009471 if (ctxt->encoding != NULL)
9472 xmlFree((xmlChar *) ctxt->encoding);
9473 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009474 }
9475 /*
9476 * UTF-8 encoding is handled natively
9477 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009478 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009479 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9480 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009481 if (ctxt->encoding != NULL)
9482 xmlFree((xmlChar *) ctxt->encoding);
9483 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009484 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009485 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009486 xmlCharEncodingHandlerPtr handler;
9487
9488 if (ctxt->input->encoding != NULL)
9489 xmlFree((xmlChar *) ctxt->input->encoding);
9490 ctxt->input->encoding = encoding;
9491
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009492 handler = xmlFindCharEncodingHandler((const char *) encoding);
9493 if (handler != NULL) {
9494 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009495 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009496 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009497 "Unsupported encoding %s\n", encoding);
9498 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009499 }
9500 }
9501 }
9502 return(encoding);
9503}
9504
9505/**
9506 * xmlParseSDDecl:
9507 * @ctxt: an XML parser context
9508 *
9509 * parse the XML standalone declaration
9510 *
9511 * [32] SDDecl ::= S 'standalone' Eq
9512 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9513 *
9514 * [ VC: Standalone Document Declaration ]
9515 * TODO The standalone document declaration must have the value "no"
9516 * if any external markup declarations contain declarations of:
9517 * - attributes with default values, if elements to which these
9518 * attributes apply appear in the document without specifications
9519 * of values for these attributes, or
9520 * - entities (other than amp, lt, gt, apos, quot), if references
9521 * to those entities appear in the document, or
9522 * - attributes with values subject to normalization, where the
9523 * attribute appears in the document with a value which will change
9524 * as a result of normalization, or
9525 * - element types with element content, if white space occurs directly
9526 * within any instance of those types.
9527 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009528 * Returns:
9529 * 1 if standalone="yes"
9530 * 0 if standalone="no"
9531 * -2 if standalone attribute is missing or invalid
9532 * (A standalone value of -2 means that the XML declaration was found,
9533 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009534 */
9535
9536int
9537xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009538 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009539
9540 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009541 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009542 SKIP(10);
9543 SKIP_BLANKS;
9544 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009545 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009546 return(standalone);
9547 }
9548 NEXT;
9549 SKIP_BLANKS;
9550 if (RAW == '\''){
9551 NEXT;
9552 if ((RAW == 'n') && (NXT(1) == 'o')) {
9553 standalone = 0;
9554 SKIP(2);
9555 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9556 (NXT(2) == 's')) {
9557 standalone = 1;
9558 SKIP(3);
9559 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009560 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009561 }
9562 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009563 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009564 } else
9565 NEXT;
9566 } else if (RAW == '"'){
9567 NEXT;
9568 if ((RAW == 'n') && (NXT(1) == 'o')) {
9569 standalone = 0;
9570 SKIP(2);
9571 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9572 (NXT(2) == 's')) {
9573 standalone = 1;
9574 SKIP(3);
9575 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009576 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009577 }
9578 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009579 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009580 } else
9581 NEXT;
9582 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009583 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009584 }
9585 }
9586 return(standalone);
9587}
9588
9589/**
9590 * xmlParseXMLDecl:
9591 * @ctxt: an XML parser context
9592 *
9593 * parse an XML declaration header
9594 *
9595 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9596 */
9597
9598void
9599xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9600 xmlChar *version;
9601
9602 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009603 * This value for standalone indicates that the document has an
9604 * XML declaration but it does not have a standalone attribute.
9605 * It will be overwritten later if a standalone attribute is found.
9606 */
9607 ctxt->input->standalone = -2;
9608
9609 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009610 * We know that '<?xml' is here.
9611 */
9612 SKIP(5);
9613
William M. Brack76e95df2003-10-18 16:20:14 +00009614 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009615 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9616 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009617 }
9618 SKIP_BLANKS;
9619
9620 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009621 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009622 */
9623 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009624 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009625 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009626 } else {
9627 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9628 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009629 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009630 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009631 if (ctxt->options & XML_PARSE_OLD10) {
9632 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9633 "Unsupported version '%s'\n",
9634 version);
9635 } else {
9636 if ((version[0] == '1') && ((version[1] == '.'))) {
9637 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9638 "Unsupported version '%s'\n",
9639 version, NULL);
9640 } else {
9641 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9642 "Unsupported version '%s'\n",
9643 version);
9644 }
9645 }
Daniel Veillard19840942001-11-29 16:11:38 +00009646 }
9647 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009648 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009649 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009650 }
Owen Taylor3473f882001-02-23 17:55:21 +00009651
9652 /*
9653 * We may have the encoding declaration
9654 */
William M. Brack76e95df2003-10-18 16:20:14 +00009655 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009656 if ((RAW == '?') && (NXT(1) == '>')) {
9657 SKIP(2);
9658 return;
9659 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009660 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009661 }
9662 xmlParseEncodingDecl(ctxt);
9663 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9664 /*
9665 * The XML REC instructs us to stop parsing right here
9666 */
9667 return;
9668 }
9669
9670 /*
9671 * We may have the standalone status.
9672 */
William M. Brack76e95df2003-10-18 16:20:14 +00009673 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009674 if ((RAW == '?') && (NXT(1) == '>')) {
9675 SKIP(2);
9676 return;
9677 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009679 }
9680 SKIP_BLANKS;
9681 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9682
9683 SKIP_BLANKS;
9684 if ((RAW == '?') && (NXT(1) == '>')) {
9685 SKIP(2);
9686 } else if (RAW == '>') {
9687 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009688 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009689 NEXT;
9690 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009691 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009692 MOVETO_ENDTAG(CUR_PTR);
9693 NEXT;
9694 }
9695}
9696
9697/**
9698 * xmlParseMisc:
9699 * @ctxt: an XML parser context
9700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009701 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009702 *
9703 * [27] Misc ::= Comment | PI | S
9704 */
9705
9706void
9707xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009708 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009709 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009710 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009711 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009712 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009713 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009714 NEXT;
9715 } else
9716 xmlParseComment(ctxt);
9717 }
9718}
9719
9720/**
9721 * xmlParseDocument:
9722 * @ctxt: an XML parser context
9723 *
9724 * parse an XML document (and build a tree if using the standard SAX
9725 * interface).
9726 *
9727 * [1] document ::= prolog element Misc*
9728 *
9729 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9730 *
9731 * Returns 0, -1 in case of error. the parser context is augmented
9732 * as a result of the parsing.
9733 */
9734
9735int
9736xmlParseDocument(xmlParserCtxtPtr ctxt) {
9737 xmlChar start[4];
9738 xmlCharEncoding enc;
9739
9740 xmlInitParser();
9741
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009742 if ((ctxt == NULL) || (ctxt->input == NULL))
9743 return(-1);
9744
Owen Taylor3473f882001-02-23 17:55:21 +00009745 GROW;
9746
9747 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009748 * SAX: detecting the level.
9749 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009750 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009751
9752 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009753 * SAX: beginning of the document processing.
9754 */
9755 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9756 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9757
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009758 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9759 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009760 /*
9761 * Get the 4 first bytes and decode the charset
9762 * if enc != XML_CHAR_ENCODING_NONE
9763 * plug some encoding conversion routines.
9764 */
9765 start[0] = RAW;
9766 start[1] = NXT(1);
9767 start[2] = NXT(2);
9768 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009769 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009770 if (enc != XML_CHAR_ENCODING_NONE) {
9771 xmlSwitchEncoding(ctxt, enc);
9772 }
Owen Taylor3473f882001-02-23 17:55:21 +00009773 }
9774
9775
9776 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009777 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009778 }
9779
9780 /*
9781 * Check for the XMLDecl in the Prolog.
9782 */
9783 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009784 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009785
9786 /*
9787 * Note that we will switch encoding on the fly.
9788 */
9789 xmlParseXMLDecl(ctxt);
9790 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9791 /*
9792 * The XML REC instructs us to stop parsing right here
9793 */
9794 return(-1);
9795 }
9796 ctxt->standalone = ctxt->input->standalone;
9797 SKIP_BLANKS;
9798 } else {
9799 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9800 }
9801 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9802 ctxt->sax->startDocument(ctxt->userData);
9803
9804 /*
9805 * The Misc part of the Prolog
9806 */
9807 GROW;
9808 xmlParseMisc(ctxt);
9809
9810 /*
9811 * Then possibly doc type declaration(s) and more Misc
9812 * (doctypedecl Misc*)?
9813 */
9814 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009815 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009816
9817 ctxt->inSubset = 1;
9818 xmlParseDocTypeDecl(ctxt);
9819 if (RAW == '[') {
9820 ctxt->instate = XML_PARSER_DTD;
9821 xmlParseInternalSubset(ctxt);
9822 }
9823
9824 /*
9825 * Create and update the external subset.
9826 */
9827 ctxt->inSubset = 2;
9828 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9829 (!ctxt->disableSAX))
9830 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9831 ctxt->extSubSystem, ctxt->extSubURI);
9832 ctxt->inSubset = 0;
9833
Daniel Veillardac4118d2008-01-11 05:27:32 +00009834 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009835
9836 ctxt->instate = XML_PARSER_PROLOG;
9837 xmlParseMisc(ctxt);
9838 }
9839
9840 /*
9841 * Time to start parsing the tree itself
9842 */
9843 GROW;
9844 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009845 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9846 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009847 } else {
9848 ctxt->instate = XML_PARSER_CONTENT;
9849 xmlParseElement(ctxt);
9850 ctxt->instate = XML_PARSER_EPILOG;
9851
9852
9853 /*
9854 * The Misc part at the end
9855 */
9856 xmlParseMisc(ctxt);
9857
Daniel Veillard561b7f82002-03-20 21:55:57 +00009858 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009859 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009860 }
9861 ctxt->instate = XML_PARSER_EOF;
9862 }
9863
9864 /*
9865 * SAX: end of the document processing.
9866 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009867 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009868 ctxt->sax->endDocument(ctxt->userData);
9869
Daniel Veillard5997aca2002-03-18 18:36:20 +00009870 /*
9871 * Remove locally kept entity definitions if the tree was not built
9872 */
9873 if ((ctxt->myDoc != NULL) &&
9874 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9875 xmlFreeDoc(ctxt->myDoc);
9876 ctxt->myDoc = NULL;
9877 }
9878
Daniel Veillardc7612992002-02-17 22:47:37 +00009879 if (! ctxt->wellFormed) {
9880 ctxt->valid = 0;
9881 return(-1);
9882 }
Owen Taylor3473f882001-02-23 17:55:21 +00009883 return(0);
9884}
9885
9886/**
9887 * xmlParseExtParsedEnt:
9888 * @ctxt: an XML parser context
9889 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009890 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009891 * An external general parsed entity is well-formed if it matches the
9892 * production labeled extParsedEnt.
9893 *
9894 * [78] extParsedEnt ::= TextDecl? content
9895 *
9896 * Returns 0, -1 in case of error. the parser context is augmented
9897 * as a result of the parsing.
9898 */
9899
9900int
9901xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9902 xmlChar start[4];
9903 xmlCharEncoding enc;
9904
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009905 if ((ctxt == NULL) || (ctxt->input == NULL))
9906 return(-1);
9907
Owen Taylor3473f882001-02-23 17:55:21 +00009908 xmlDefaultSAXHandlerInit();
9909
Daniel Veillard309f81d2003-09-23 09:02:53 +00009910 xmlDetectSAX2(ctxt);
9911
Owen Taylor3473f882001-02-23 17:55:21 +00009912 GROW;
9913
9914 /*
9915 * SAX: beginning of the document processing.
9916 */
9917 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9918 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9919
9920 /*
9921 * Get the 4 first bytes and decode the charset
9922 * if enc != XML_CHAR_ENCODING_NONE
9923 * plug some encoding conversion routines.
9924 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009925 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9926 start[0] = RAW;
9927 start[1] = NXT(1);
9928 start[2] = NXT(2);
9929 start[3] = NXT(3);
9930 enc = xmlDetectCharEncoding(start, 4);
9931 if (enc != XML_CHAR_ENCODING_NONE) {
9932 xmlSwitchEncoding(ctxt, enc);
9933 }
Owen Taylor3473f882001-02-23 17:55:21 +00009934 }
9935
9936
9937 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009938 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009939 }
9940
9941 /*
9942 * Check for the XMLDecl in the Prolog.
9943 */
9944 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009945 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009946
9947 /*
9948 * Note that we will switch encoding on the fly.
9949 */
9950 xmlParseXMLDecl(ctxt);
9951 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9952 /*
9953 * The XML REC instructs us to stop parsing right here
9954 */
9955 return(-1);
9956 }
9957 SKIP_BLANKS;
9958 } else {
9959 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9960 }
9961 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9962 ctxt->sax->startDocument(ctxt->userData);
9963
9964 /*
9965 * Doing validity checking on chunk doesn't make sense
9966 */
9967 ctxt->instate = XML_PARSER_CONTENT;
9968 ctxt->validate = 0;
9969 ctxt->loadsubset = 0;
9970 ctxt->depth = 0;
9971
9972 xmlParseContent(ctxt);
9973
9974 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009975 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009976 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009977 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009978 }
9979
9980 /*
9981 * SAX: end of the document processing.
9982 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009983 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009984 ctxt->sax->endDocument(ctxt->userData);
9985
9986 if (! ctxt->wellFormed) return(-1);
9987 return(0);
9988}
9989
Daniel Veillard73b013f2003-09-30 12:36:01 +00009990#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009991/************************************************************************
9992 * *
9993 * Progressive parsing interfaces *
9994 * *
9995 ************************************************************************/
9996
9997/**
9998 * xmlParseLookupSequence:
9999 * @ctxt: an XML parser context
10000 * @first: the first char to lookup
10001 * @next: the next char to lookup or zero
10002 * @third: the next char to lookup or zero
10003 *
10004 * Try to find if a sequence (first, next, third) or just (first next) or
10005 * (first) is available in the input stream.
10006 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10007 * to avoid rescanning sequences of bytes, it DOES change the state of the
10008 * parser, do not use liberally.
10009 *
10010 * Returns the index to the current parsing point if the full sequence
10011 * is available, -1 otherwise.
10012 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010013static int
Owen Taylor3473f882001-02-23 17:55:21 +000010014xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10015 xmlChar next, xmlChar third) {
10016 int base, len;
10017 xmlParserInputPtr in;
10018 const xmlChar *buf;
10019
10020 in = ctxt->input;
10021 if (in == NULL) return(-1);
10022 base = in->cur - in->base;
10023 if (base < 0) return(-1);
10024 if (ctxt->checkIndex > base)
10025 base = ctxt->checkIndex;
10026 if (in->buf == NULL) {
10027 buf = in->base;
10028 len = in->length;
10029 } else {
10030 buf = in->buf->buffer->content;
10031 len = in->buf->buffer->use;
10032 }
10033 /* take into account the sequence length */
10034 if (third) len -= 2;
10035 else if (next) len --;
10036 for (;base < len;base++) {
10037 if (buf[base] == first) {
10038 if (third != 0) {
10039 if ((buf[base + 1] != next) ||
10040 (buf[base + 2] != third)) continue;
10041 } else if (next != 0) {
10042 if (buf[base + 1] != next) continue;
10043 }
10044 ctxt->checkIndex = 0;
10045#ifdef DEBUG_PUSH
10046 if (next == 0)
10047 xmlGenericError(xmlGenericErrorContext,
10048 "PP: lookup '%c' found at %d\n",
10049 first, base);
10050 else if (third == 0)
10051 xmlGenericError(xmlGenericErrorContext,
10052 "PP: lookup '%c%c' found at %d\n",
10053 first, next, base);
10054 else
10055 xmlGenericError(xmlGenericErrorContext,
10056 "PP: lookup '%c%c%c' found at %d\n",
10057 first, next, third, base);
10058#endif
10059 return(base - (in->cur - in->base));
10060 }
10061 }
10062 ctxt->checkIndex = base;
10063#ifdef DEBUG_PUSH
10064 if (next == 0)
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: lookup '%c' failed\n", first);
10067 else if (third == 0)
10068 xmlGenericError(xmlGenericErrorContext,
10069 "PP: lookup '%c%c' failed\n", first, next);
10070 else
10071 xmlGenericError(xmlGenericErrorContext,
10072 "PP: lookup '%c%c%c' failed\n", first, next, third);
10073#endif
10074 return(-1);
10075}
10076
10077/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010078 * xmlParseGetLasts:
10079 * @ctxt: an XML parser context
10080 * @lastlt: pointer to store the last '<' from the input
10081 * @lastgt: pointer to store the last '>' from the input
10082 *
10083 * Lookup the last < and > in the current chunk
10084 */
10085static void
10086xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10087 const xmlChar **lastgt) {
10088 const xmlChar *tmp;
10089
10090 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10091 xmlGenericError(xmlGenericErrorContext,
10092 "Internal error: xmlParseGetLasts\n");
10093 return;
10094 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010095 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010096 tmp = ctxt->input->end;
10097 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010098 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010099 if (tmp < ctxt->input->base) {
10100 *lastlt = NULL;
10101 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010102 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010103 *lastlt = tmp;
10104 tmp++;
10105 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10106 if (*tmp == '\'') {
10107 tmp++;
10108 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10109 if (tmp < ctxt->input->end) tmp++;
10110 } else if (*tmp == '"') {
10111 tmp++;
10112 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10113 if (tmp < ctxt->input->end) tmp++;
10114 } else
10115 tmp++;
10116 }
10117 if (tmp < ctxt->input->end)
10118 *lastgt = tmp;
10119 else {
10120 tmp = *lastlt;
10121 tmp--;
10122 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10123 if (tmp >= ctxt->input->base)
10124 *lastgt = tmp;
10125 else
10126 *lastgt = NULL;
10127 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010128 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010129 } else {
10130 *lastlt = NULL;
10131 *lastgt = NULL;
10132 }
10133}
10134/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010135 * xmlCheckCdataPush:
10136 * @cur: pointer to the bock of characters
10137 * @len: length of the block in bytes
10138 *
10139 * Check that the block of characters is okay as SCdata content [20]
10140 *
10141 * Returns the number of bytes to pass if okay, a negative index where an
10142 * UTF-8 error occured otherwise
10143 */
10144static int
10145xmlCheckCdataPush(const xmlChar *utf, int len) {
10146 int ix;
10147 unsigned char c;
10148 int codepoint;
10149
10150 if ((utf == NULL) || (len <= 0))
10151 return(0);
10152
10153 for (ix = 0; ix < len;) { /* string is 0-terminated */
10154 c = utf[ix];
10155 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10156 if (c >= 0x20)
10157 ix++;
10158 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10159 ix++;
10160 else
10161 return(-ix);
10162 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10163 if (ix + 2 > len) return(ix);
10164 if ((utf[ix+1] & 0xc0 ) != 0x80)
10165 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010166 codepoint = (utf[ix] & 0x1f) << 6;
10167 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010168 if (!xmlIsCharQ(codepoint))
10169 return(-ix);
10170 ix += 2;
10171 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10172 if (ix + 3 > len) return(ix);
10173 if (((utf[ix+1] & 0xc0) != 0x80) ||
10174 ((utf[ix+2] & 0xc0) != 0x80))
10175 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010176 codepoint = (utf[ix] & 0xf) << 12;
10177 codepoint |= (utf[ix+1] & 0x3f) << 6;
10178 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010179 if (!xmlIsCharQ(codepoint))
10180 return(-ix);
10181 ix += 3;
10182 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10183 if (ix + 4 > len) return(ix);
10184 if (((utf[ix+1] & 0xc0) != 0x80) ||
10185 ((utf[ix+2] & 0xc0) != 0x80) ||
10186 ((utf[ix+3] & 0xc0) != 0x80))
10187 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010188 codepoint = (utf[ix] & 0x7) << 18;
10189 codepoint |= (utf[ix+1] & 0x3f) << 12;
10190 codepoint |= (utf[ix+2] & 0x3f) << 6;
10191 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010192 if (!xmlIsCharQ(codepoint))
10193 return(-ix);
10194 ix += 4;
10195 } else /* unknown encoding */
10196 return(-ix);
10197 }
10198 return(ix);
10199}
10200
10201/**
Owen Taylor3473f882001-02-23 17:55:21 +000010202 * xmlParseTryOrFinish:
10203 * @ctxt: an XML parser context
10204 * @terminate: last chunk indicator
10205 *
10206 * Try to progress on parsing
10207 *
10208 * Returns zero if no parsing was possible
10209 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010210static int
Owen Taylor3473f882001-02-23 17:55:21 +000010211xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10212 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010213 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010214 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010215 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010216
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010217 if (ctxt->input == NULL)
10218 return(0);
10219
Owen Taylor3473f882001-02-23 17:55:21 +000010220#ifdef DEBUG_PUSH
10221 switch (ctxt->instate) {
10222 case XML_PARSER_EOF:
10223 xmlGenericError(xmlGenericErrorContext,
10224 "PP: try EOF\n"); break;
10225 case XML_PARSER_START:
10226 xmlGenericError(xmlGenericErrorContext,
10227 "PP: try START\n"); break;
10228 case XML_PARSER_MISC:
10229 xmlGenericError(xmlGenericErrorContext,
10230 "PP: try MISC\n");break;
10231 case XML_PARSER_COMMENT:
10232 xmlGenericError(xmlGenericErrorContext,
10233 "PP: try COMMENT\n");break;
10234 case XML_PARSER_PROLOG:
10235 xmlGenericError(xmlGenericErrorContext,
10236 "PP: try PROLOG\n");break;
10237 case XML_PARSER_START_TAG:
10238 xmlGenericError(xmlGenericErrorContext,
10239 "PP: try START_TAG\n");break;
10240 case XML_PARSER_CONTENT:
10241 xmlGenericError(xmlGenericErrorContext,
10242 "PP: try CONTENT\n");break;
10243 case XML_PARSER_CDATA_SECTION:
10244 xmlGenericError(xmlGenericErrorContext,
10245 "PP: try CDATA_SECTION\n");break;
10246 case XML_PARSER_END_TAG:
10247 xmlGenericError(xmlGenericErrorContext,
10248 "PP: try END_TAG\n");break;
10249 case XML_PARSER_ENTITY_DECL:
10250 xmlGenericError(xmlGenericErrorContext,
10251 "PP: try ENTITY_DECL\n");break;
10252 case XML_PARSER_ENTITY_VALUE:
10253 xmlGenericError(xmlGenericErrorContext,
10254 "PP: try ENTITY_VALUE\n");break;
10255 case XML_PARSER_ATTRIBUTE_VALUE:
10256 xmlGenericError(xmlGenericErrorContext,
10257 "PP: try ATTRIBUTE_VALUE\n");break;
10258 case XML_PARSER_DTD:
10259 xmlGenericError(xmlGenericErrorContext,
10260 "PP: try DTD\n");break;
10261 case XML_PARSER_EPILOG:
10262 xmlGenericError(xmlGenericErrorContext,
10263 "PP: try EPILOG\n");break;
10264 case XML_PARSER_PI:
10265 xmlGenericError(xmlGenericErrorContext,
10266 "PP: try PI\n");break;
10267 case XML_PARSER_IGNORE:
10268 xmlGenericError(xmlGenericErrorContext,
10269 "PP: try IGNORE\n");break;
10270 }
10271#endif
10272
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010273 if ((ctxt->input != NULL) &&
10274 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010275 xmlSHRINK(ctxt);
10276 ctxt->checkIndex = 0;
10277 }
10278 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010279
Daniel Veillarda880b122003-04-21 21:36:41 +000010280 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010281 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010282 return(0);
10283
10284
Owen Taylor3473f882001-02-23 17:55:21 +000010285 /*
10286 * Pop-up of finished entities.
10287 */
10288 while ((RAW == 0) && (ctxt->inputNr > 1))
10289 xmlPopInput(ctxt);
10290
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010291 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010292 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010293 avail = ctxt->input->length -
10294 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010295 else {
10296 /*
10297 * If we are operating on converted input, try to flush
10298 * remainng chars to avoid them stalling in the non-converted
10299 * buffer.
10300 */
10301 if ((ctxt->input->buf->raw != NULL) &&
10302 (ctxt->input->buf->raw->use > 0)) {
10303 int base = ctxt->input->base -
10304 ctxt->input->buf->buffer->content;
10305 int current = ctxt->input->cur - ctxt->input->base;
10306
10307 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10308 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10309 ctxt->input->cur = ctxt->input->base + current;
10310 ctxt->input->end =
10311 &ctxt->input->buf->buffer->content[
10312 ctxt->input->buf->buffer->use];
10313 }
10314 avail = ctxt->input->buf->buffer->use -
10315 (ctxt->input->cur - ctxt->input->base);
10316 }
Owen Taylor3473f882001-02-23 17:55:21 +000010317 if (avail < 1)
10318 goto done;
10319 switch (ctxt->instate) {
10320 case XML_PARSER_EOF:
10321 /*
10322 * Document parsing is done !
10323 */
10324 goto done;
10325 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010326 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10327 xmlChar start[4];
10328 xmlCharEncoding enc;
10329
10330 /*
10331 * Very first chars read from the document flow.
10332 */
10333 if (avail < 4)
10334 goto done;
10335
10336 /*
10337 * Get the 4 first bytes and decode the charset
10338 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010339 * plug some encoding conversion routines,
10340 * else xmlSwitchEncoding will set to (default)
10341 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010342 */
10343 start[0] = RAW;
10344 start[1] = NXT(1);
10345 start[2] = NXT(2);
10346 start[3] = NXT(3);
10347 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010348 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010349 break;
10350 }
Owen Taylor3473f882001-02-23 17:55:21 +000010351
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010352 if (avail < 2)
10353 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010354 cur = ctxt->input->cur[0];
10355 next = ctxt->input->cur[1];
10356 if (cur == 0) {
10357 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10358 ctxt->sax->setDocumentLocator(ctxt->userData,
10359 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010360 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010361 ctxt->instate = XML_PARSER_EOF;
10362#ifdef DEBUG_PUSH
10363 xmlGenericError(xmlGenericErrorContext,
10364 "PP: entering EOF\n");
10365#endif
10366 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10367 ctxt->sax->endDocument(ctxt->userData);
10368 goto done;
10369 }
10370 if ((cur == '<') && (next == '?')) {
10371 /* PI or XML decl */
10372 if (avail < 5) return(ret);
10373 if ((!terminate) &&
10374 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10375 return(ret);
10376 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10377 ctxt->sax->setDocumentLocator(ctxt->userData,
10378 &xmlDefaultSAXLocator);
10379 if ((ctxt->input->cur[2] == 'x') &&
10380 (ctxt->input->cur[3] == 'm') &&
10381 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010382 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010383 ret += 5;
10384#ifdef DEBUG_PUSH
10385 xmlGenericError(xmlGenericErrorContext,
10386 "PP: Parsing XML Decl\n");
10387#endif
10388 xmlParseXMLDecl(ctxt);
10389 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10390 /*
10391 * The XML REC instructs us to stop parsing right
10392 * here
10393 */
10394 ctxt->instate = XML_PARSER_EOF;
10395 return(0);
10396 }
10397 ctxt->standalone = ctxt->input->standalone;
10398 if ((ctxt->encoding == NULL) &&
10399 (ctxt->input->encoding != NULL))
10400 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10401 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10402 (!ctxt->disableSAX))
10403 ctxt->sax->startDocument(ctxt->userData);
10404 ctxt->instate = XML_PARSER_MISC;
10405#ifdef DEBUG_PUSH
10406 xmlGenericError(xmlGenericErrorContext,
10407 "PP: entering MISC\n");
10408#endif
10409 } else {
10410 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10411 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10412 (!ctxt->disableSAX))
10413 ctxt->sax->startDocument(ctxt->userData);
10414 ctxt->instate = XML_PARSER_MISC;
10415#ifdef DEBUG_PUSH
10416 xmlGenericError(xmlGenericErrorContext,
10417 "PP: entering MISC\n");
10418#endif
10419 }
10420 } else {
10421 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10422 ctxt->sax->setDocumentLocator(ctxt->userData,
10423 &xmlDefaultSAXLocator);
10424 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010425 if (ctxt->version == NULL) {
10426 xmlErrMemory(ctxt, NULL);
10427 break;
10428 }
Owen Taylor3473f882001-02-23 17:55:21 +000010429 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10430 (!ctxt->disableSAX))
10431 ctxt->sax->startDocument(ctxt->userData);
10432 ctxt->instate = XML_PARSER_MISC;
10433#ifdef DEBUG_PUSH
10434 xmlGenericError(xmlGenericErrorContext,
10435 "PP: entering MISC\n");
10436#endif
10437 }
10438 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010439 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010440 const xmlChar *name;
10441 const xmlChar *prefix;
10442 const xmlChar *URI;
10443 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010444
10445 if ((avail < 2) && (ctxt->inputNr == 1))
10446 goto done;
10447 cur = ctxt->input->cur[0];
10448 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010449 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010450 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010451 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10452 ctxt->sax->endDocument(ctxt->userData);
10453 goto done;
10454 }
10455 if (!terminate) {
10456 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010457 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010458 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010459 goto done;
10460 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10461 goto done;
10462 }
10463 }
10464 if (ctxt->spaceNr == 0)
10465 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010466 else if (*ctxt->space == -2)
10467 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010468 else
10469 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010470#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010471 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010472#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010473 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010474#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010475 else
10476 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010477#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010478 if (name == NULL) {
10479 spacePop(ctxt);
10480 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010481 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10482 ctxt->sax->endDocument(ctxt->userData);
10483 goto done;
10484 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010485#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010486 /*
10487 * [ VC: Root Element Type ]
10488 * The Name in the document type declaration must match
10489 * the element type of the root element.
10490 */
10491 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10492 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10493 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010494#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010495
10496 /*
10497 * Check for an Empty Element.
10498 */
10499 if ((RAW == '/') && (NXT(1) == '>')) {
10500 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010501
10502 if (ctxt->sax2) {
10503 if ((ctxt->sax != NULL) &&
10504 (ctxt->sax->endElementNs != NULL) &&
10505 (!ctxt->disableSAX))
10506 ctxt->sax->endElementNs(ctxt->userData, name,
10507 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010508 if (ctxt->nsNr - nsNr > 0)
10509 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010510#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010511 } else {
10512 if ((ctxt->sax != NULL) &&
10513 (ctxt->sax->endElement != NULL) &&
10514 (!ctxt->disableSAX))
10515 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010516#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010517 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010518 spacePop(ctxt);
10519 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010520 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010521 } else {
10522 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010523 }
10524 break;
10525 }
10526 if (RAW == '>') {
10527 NEXT;
10528 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010529 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010530 "Couldn't find end of Start Tag %s\n",
10531 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010532 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010533 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010534 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010535 if (ctxt->sax2)
10536 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010537#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010538 else
10539 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010540#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010541
Daniel Veillarda880b122003-04-21 21:36:41 +000010542 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010543 break;
10544 }
10545 case XML_PARSER_CONTENT: {
10546 const xmlChar *test;
10547 unsigned int cons;
10548 if ((avail < 2) && (ctxt->inputNr == 1))
10549 goto done;
10550 cur = ctxt->input->cur[0];
10551 next = ctxt->input->cur[1];
10552
10553 test = CUR_PTR;
10554 cons = ctxt->input->consumed;
10555 if ((cur == '<') && (next == '/')) {
10556 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010557 break;
10558 } else if ((cur == '<') && (next == '?')) {
10559 if ((!terminate) &&
10560 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10561 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010562 xmlParsePI(ctxt);
10563 } else if ((cur == '<') && (next != '!')) {
10564 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010565 break;
10566 } else if ((cur == '<') && (next == '!') &&
10567 (ctxt->input->cur[2] == '-') &&
10568 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010569 int term;
10570
10571 if (avail < 4)
10572 goto done;
10573 ctxt->input->cur += 4;
10574 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10575 ctxt->input->cur -= 4;
10576 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010577 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010578 xmlParseComment(ctxt);
10579 ctxt->instate = XML_PARSER_CONTENT;
10580 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10581 (ctxt->input->cur[2] == '[') &&
10582 (ctxt->input->cur[3] == 'C') &&
10583 (ctxt->input->cur[4] == 'D') &&
10584 (ctxt->input->cur[5] == 'A') &&
10585 (ctxt->input->cur[6] == 'T') &&
10586 (ctxt->input->cur[7] == 'A') &&
10587 (ctxt->input->cur[8] == '[')) {
10588 SKIP(9);
10589 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010590 break;
10591 } else if ((cur == '<') && (next == '!') &&
10592 (avail < 9)) {
10593 goto done;
10594 } else if (cur == '&') {
10595 if ((!terminate) &&
10596 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10597 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010598 xmlParseReference(ctxt);
10599 } else {
10600 /* TODO Avoid the extra copy, handle directly !!! */
10601 /*
10602 * Goal of the following test is:
10603 * - minimize calls to the SAX 'character' callback
10604 * when they are mergeable
10605 * - handle an problem for isBlank when we only parse
10606 * a sequence of blank chars and the next one is
10607 * not available to check against '<' presence.
10608 * - tries to homogenize the differences in SAX
10609 * callbacks between the push and pull versions
10610 * of the parser.
10611 */
10612 if ((ctxt->inputNr == 1) &&
10613 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10614 if (!terminate) {
10615 if (ctxt->progressive) {
10616 if ((lastlt == NULL) ||
10617 (ctxt->input->cur > lastlt))
10618 goto done;
10619 } else if (xmlParseLookupSequence(ctxt,
10620 '<', 0, 0) < 0) {
10621 goto done;
10622 }
10623 }
10624 }
10625 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010626 xmlParseCharData(ctxt, 0);
10627 }
10628 /*
10629 * Pop-up of finished entities.
10630 */
10631 while ((RAW == 0) && (ctxt->inputNr > 1))
10632 xmlPopInput(ctxt);
10633 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010634 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10635 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010636 ctxt->instate = XML_PARSER_EOF;
10637 break;
10638 }
10639 break;
10640 }
10641 case XML_PARSER_END_TAG:
10642 if (avail < 2)
10643 goto done;
10644 if (!terminate) {
10645 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010646 /* > can be found unescaped in attribute values */
10647 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010648 goto done;
10649 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10650 goto done;
10651 }
10652 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010653 if (ctxt->sax2) {
10654 xmlParseEndTag2(ctxt,
10655 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10656 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010657 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010658 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010659 }
10660#ifdef LIBXML_SAX1_ENABLED
10661 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010662 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010663#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010664 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010665 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010666 } else {
10667 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010668 }
10669 break;
10670 case XML_PARSER_CDATA_SECTION: {
10671 /*
10672 * The Push mode need to have the SAX callback for
10673 * cdataBlock merge back contiguous callbacks.
10674 */
10675 int base;
10676
10677 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10678 if (base < 0) {
10679 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010680 int tmp;
10681
10682 tmp = xmlCheckCdataPush(ctxt->input->cur,
10683 XML_PARSER_BIG_BUFFER_SIZE);
10684 if (tmp < 0) {
10685 tmp = -tmp;
10686 ctxt->input->cur += tmp;
10687 goto encoding_error;
10688 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010689 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10690 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010691 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010692 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010693 else if (ctxt->sax->characters != NULL)
10694 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010695 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010696 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010697 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010698 ctxt->checkIndex = 0;
10699 }
10700 goto done;
10701 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010702 int tmp;
10703
10704 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10705 if ((tmp < 0) || (tmp != base)) {
10706 tmp = -tmp;
10707 ctxt->input->cur += tmp;
10708 goto encoding_error;
10709 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010710 if ((ctxt->sax != NULL) && (base == 0) &&
10711 (ctxt->sax->cdataBlock != NULL) &&
10712 (!ctxt->disableSAX)) {
10713 /*
10714 * Special case to provide identical behaviour
10715 * between pull and push parsers on enpty CDATA
10716 * sections
10717 */
10718 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10719 (!strncmp((const char *)&ctxt->input->cur[-9],
10720 "<![CDATA[", 9)))
10721 ctxt->sax->cdataBlock(ctxt->userData,
10722 BAD_CAST "", 0);
10723 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010724 (!ctxt->disableSAX)) {
10725 if (ctxt->sax->cdataBlock != NULL)
10726 ctxt->sax->cdataBlock(ctxt->userData,
10727 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010728 else if (ctxt->sax->characters != NULL)
10729 ctxt->sax->characters(ctxt->userData,
10730 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010731 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010732 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010733 ctxt->checkIndex = 0;
10734 ctxt->instate = XML_PARSER_CONTENT;
10735#ifdef DEBUG_PUSH
10736 xmlGenericError(xmlGenericErrorContext,
10737 "PP: entering CONTENT\n");
10738#endif
10739 }
10740 break;
10741 }
Owen Taylor3473f882001-02-23 17:55:21 +000010742 case XML_PARSER_MISC:
10743 SKIP_BLANKS;
10744 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010745 avail = ctxt->input->length -
10746 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010747 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010748 avail = ctxt->input->buf->buffer->use -
10749 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010750 if (avail < 2)
10751 goto done;
10752 cur = ctxt->input->cur[0];
10753 next = ctxt->input->cur[1];
10754 if ((cur == '<') && (next == '?')) {
10755 if ((!terminate) &&
10756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10757 goto done;
10758#ifdef DEBUG_PUSH
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: Parsing PI\n");
10761#endif
10762 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010763 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010764 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010765 (ctxt->input->cur[2] == '-') &&
10766 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010767 if ((!terminate) &&
10768 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10769 goto done;
10770#ifdef DEBUG_PUSH
10771 xmlGenericError(xmlGenericErrorContext,
10772 "PP: Parsing Comment\n");
10773#endif
10774 xmlParseComment(ctxt);
10775 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010776 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010777 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010778 (ctxt->input->cur[2] == 'D') &&
10779 (ctxt->input->cur[3] == 'O') &&
10780 (ctxt->input->cur[4] == 'C') &&
10781 (ctxt->input->cur[5] == 'T') &&
10782 (ctxt->input->cur[6] == 'Y') &&
10783 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010784 (ctxt->input->cur[8] == 'E')) {
10785 if ((!terminate) &&
10786 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10787 goto done;
10788#ifdef DEBUG_PUSH
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: Parsing internal subset\n");
10791#endif
10792 ctxt->inSubset = 1;
10793 xmlParseDocTypeDecl(ctxt);
10794 if (RAW == '[') {
10795 ctxt->instate = XML_PARSER_DTD;
10796#ifdef DEBUG_PUSH
10797 xmlGenericError(xmlGenericErrorContext,
10798 "PP: entering DTD\n");
10799#endif
10800 } else {
10801 /*
10802 * Create and update the external subset.
10803 */
10804 ctxt->inSubset = 2;
10805 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10806 (ctxt->sax->externalSubset != NULL))
10807 ctxt->sax->externalSubset(ctxt->userData,
10808 ctxt->intSubName, ctxt->extSubSystem,
10809 ctxt->extSubURI);
10810 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010811 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010812 ctxt->instate = XML_PARSER_PROLOG;
10813#ifdef DEBUG_PUSH
10814 xmlGenericError(xmlGenericErrorContext,
10815 "PP: entering PROLOG\n");
10816#endif
10817 }
10818 } else if ((cur == '<') && (next == '!') &&
10819 (avail < 9)) {
10820 goto done;
10821 } else {
10822 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010823 ctxt->progressive = 1;
10824 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010825#ifdef DEBUG_PUSH
10826 xmlGenericError(xmlGenericErrorContext,
10827 "PP: entering START_TAG\n");
10828#endif
10829 }
10830 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010831 case XML_PARSER_PROLOG:
10832 SKIP_BLANKS;
10833 if (ctxt->input->buf == NULL)
10834 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10835 else
10836 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10837 if (avail < 2)
10838 goto done;
10839 cur = ctxt->input->cur[0];
10840 next = ctxt->input->cur[1];
10841 if ((cur == '<') && (next == '?')) {
10842 if ((!terminate) &&
10843 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10844 goto done;
10845#ifdef DEBUG_PUSH
10846 xmlGenericError(xmlGenericErrorContext,
10847 "PP: Parsing PI\n");
10848#endif
10849 xmlParsePI(ctxt);
10850 } else if ((cur == '<') && (next == '!') &&
10851 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10852 if ((!terminate) &&
10853 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10854 goto done;
10855#ifdef DEBUG_PUSH
10856 xmlGenericError(xmlGenericErrorContext,
10857 "PP: Parsing Comment\n");
10858#endif
10859 xmlParseComment(ctxt);
10860 ctxt->instate = XML_PARSER_PROLOG;
10861 } else if ((cur == '<') && (next == '!') &&
10862 (avail < 4)) {
10863 goto done;
10864 } else {
10865 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010866 if (ctxt->progressive == 0)
10867 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010868 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010869#ifdef DEBUG_PUSH
10870 xmlGenericError(xmlGenericErrorContext,
10871 "PP: entering START_TAG\n");
10872#endif
10873 }
10874 break;
10875 case XML_PARSER_EPILOG:
10876 SKIP_BLANKS;
10877 if (ctxt->input->buf == NULL)
10878 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10879 else
10880 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10881 if (avail < 2)
10882 goto done;
10883 cur = ctxt->input->cur[0];
10884 next = ctxt->input->cur[1];
10885 if ((cur == '<') && (next == '?')) {
10886 if ((!terminate) &&
10887 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10888 goto done;
10889#ifdef DEBUG_PUSH
10890 xmlGenericError(xmlGenericErrorContext,
10891 "PP: Parsing PI\n");
10892#endif
10893 xmlParsePI(ctxt);
10894 ctxt->instate = XML_PARSER_EPILOG;
10895 } else if ((cur == '<') && (next == '!') &&
10896 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10897 if ((!terminate) &&
10898 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10899 goto done;
10900#ifdef DEBUG_PUSH
10901 xmlGenericError(xmlGenericErrorContext,
10902 "PP: Parsing Comment\n");
10903#endif
10904 xmlParseComment(ctxt);
10905 ctxt->instate = XML_PARSER_EPILOG;
10906 } else if ((cur == '<') && (next == '!') &&
10907 (avail < 4)) {
10908 goto done;
10909 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010910 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010911 ctxt->instate = XML_PARSER_EOF;
10912#ifdef DEBUG_PUSH
10913 xmlGenericError(xmlGenericErrorContext,
10914 "PP: entering EOF\n");
10915#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010916 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010917 ctxt->sax->endDocument(ctxt->userData);
10918 goto done;
10919 }
10920 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010921 case XML_PARSER_DTD: {
10922 /*
10923 * Sorry but progressive parsing of the internal subset
10924 * is not expected to be supported. We first check that
10925 * the full content of the internal subset is available and
10926 * the parsing is launched only at that point.
10927 * Internal subset ends up with "']' S? '>'" in an unescaped
10928 * section and not in a ']]>' sequence which are conditional
10929 * sections (whoever argued to keep that crap in XML deserve
10930 * a place in hell !).
10931 */
10932 int base, i;
10933 xmlChar *buf;
10934 xmlChar quote = 0;
10935
10936 base = ctxt->input->cur - ctxt->input->base;
10937 if (base < 0) return(0);
10938 if (ctxt->checkIndex > base)
10939 base = ctxt->checkIndex;
10940 buf = ctxt->input->buf->buffer->content;
10941 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10942 base++) {
10943 if (quote != 0) {
10944 if (buf[base] == quote)
10945 quote = 0;
10946 continue;
10947 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010948 if ((quote == 0) && (buf[base] == '<')) {
10949 int found = 0;
10950 /* special handling of comments */
10951 if (((unsigned int) base + 4 <
10952 ctxt->input->buf->buffer->use) &&
10953 (buf[base + 1] == '!') &&
10954 (buf[base + 2] == '-') &&
10955 (buf[base + 3] == '-')) {
10956 for (;(unsigned int) base + 3 <
10957 ctxt->input->buf->buffer->use; base++) {
10958 if ((buf[base] == '-') &&
10959 (buf[base + 1] == '-') &&
10960 (buf[base + 2] == '>')) {
10961 found = 1;
10962 base += 2;
10963 break;
10964 }
10965 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010966 if (!found) {
10967#if 0
10968 fprintf(stderr, "unfinished comment\n");
10969#endif
10970 break; /* for */
10971 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010972 continue;
10973 }
10974 }
Owen Taylor3473f882001-02-23 17:55:21 +000010975 if (buf[base] == '"') {
10976 quote = '"';
10977 continue;
10978 }
10979 if (buf[base] == '\'') {
10980 quote = '\'';
10981 continue;
10982 }
10983 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010984#if 0
10985 fprintf(stderr, "%c%c%c%c: ", buf[base],
10986 buf[base + 1], buf[base + 2], buf[base + 3]);
10987#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010988 if ((unsigned int) base +1 >=
10989 ctxt->input->buf->buffer->use)
10990 break;
10991 if (buf[base + 1] == ']') {
10992 /* conditional crap, skip both ']' ! */
10993 base++;
10994 continue;
10995 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010996 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010997 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10998 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010999 if (buf[base + i] == '>') {
11000#if 0
11001 fprintf(stderr, "found\n");
11002#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011003 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011004 }
11005 if (!IS_BLANK_CH(buf[base + i])) {
11006#if 0
11007 fprintf(stderr, "not found\n");
11008#endif
11009 goto not_end_of_int_subset;
11010 }
Owen Taylor3473f882001-02-23 17:55:21 +000011011 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011012#if 0
11013 fprintf(stderr, "end of stream\n");
11014#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011015 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011016
Owen Taylor3473f882001-02-23 17:55:21 +000011017 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011018not_end_of_int_subset:
11019 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011020 }
11021 /*
11022 * We didn't found the end of the Internal subset
11023 */
Owen Taylor3473f882001-02-23 17:55:21 +000011024#ifdef DEBUG_PUSH
11025 if (next == 0)
11026 xmlGenericError(xmlGenericErrorContext,
11027 "PP: lookup of int subset end filed\n");
11028#endif
11029 goto done;
11030
11031found_end_int_subset:
11032 xmlParseInternalSubset(ctxt);
11033 ctxt->inSubset = 2;
11034 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11035 (ctxt->sax->externalSubset != NULL))
11036 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11037 ctxt->extSubSystem, ctxt->extSubURI);
11038 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011039 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011040 ctxt->instate = XML_PARSER_PROLOG;
11041 ctxt->checkIndex = 0;
11042#ifdef DEBUG_PUSH
11043 xmlGenericError(xmlGenericErrorContext,
11044 "PP: entering PROLOG\n");
11045#endif
11046 break;
11047 }
11048 case XML_PARSER_COMMENT:
11049 xmlGenericError(xmlGenericErrorContext,
11050 "PP: internal error, state == COMMENT\n");
11051 ctxt->instate = XML_PARSER_CONTENT;
11052#ifdef DEBUG_PUSH
11053 xmlGenericError(xmlGenericErrorContext,
11054 "PP: entering CONTENT\n");
11055#endif
11056 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 case XML_PARSER_IGNORE:
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: internal error, state == IGNORE");
11060 ctxt->instate = XML_PARSER_DTD;
11061#ifdef DEBUG_PUSH
11062 xmlGenericError(xmlGenericErrorContext,
11063 "PP: entering DTD\n");
11064#endif
11065 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011066 case XML_PARSER_PI:
11067 xmlGenericError(xmlGenericErrorContext,
11068 "PP: internal error, state == PI\n");
11069 ctxt->instate = XML_PARSER_CONTENT;
11070#ifdef DEBUG_PUSH
11071 xmlGenericError(xmlGenericErrorContext,
11072 "PP: entering CONTENT\n");
11073#endif
11074 break;
11075 case XML_PARSER_ENTITY_DECL:
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: internal error, state == ENTITY_DECL\n");
11078 ctxt->instate = XML_PARSER_DTD;
11079#ifdef DEBUG_PUSH
11080 xmlGenericError(xmlGenericErrorContext,
11081 "PP: entering DTD\n");
11082#endif
11083 break;
11084 case XML_PARSER_ENTITY_VALUE:
11085 xmlGenericError(xmlGenericErrorContext,
11086 "PP: internal error, state == ENTITY_VALUE\n");
11087 ctxt->instate = XML_PARSER_CONTENT;
11088#ifdef DEBUG_PUSH
11089 xmlGenericError(xmlGenericErrorContext,
11090 "PP: entering DTD\n");
11091#endif
11092 break;
11093 case XML_PARSER_ATTRIBUTE_VALUE:
11094 xmlGenericError(xmlGenericErrorContext,
11095 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11096 ctxt->instate = XML_PARSER_START_TAG;
11097#ifdef DEBUG_PUSH
11098 xmlGenericError(xmlGenericErrorContext,
11099 "PP: entering START_TAG\n");
11100#endif
11101 break;
11102 case XML_PARSER_SYSTEM_LITERAL:
11103 xmlGenericError(xmlGenericErrorContext,
11104 "PP: internal error, state == SYSTEM_LITERAL\n");
11105 ctxt->instate = XML_PARSER_START_TAG;
11106#ifdef DEBUG_PUSH
11107 xmlGenericError(xmlGenericErrorContext,
11108 "PP: entering START_TAG\n");
11109#endif
11110 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011111 case XML_PARSER_PUBLIC_LITERAL:
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: internal error, state == PUBLIC_LITERAL\n");
11114 ctxt->instate = XML_PARSER_START_TAG;
11115#ifdef DEBUG_PUSH
11116 xmlGenericError(xmlGenericErrorContext,
11117 "PP: entering START_TAG\n");
11118#endif
11119 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011120 }
11121 }
11122done:
11123#ifdef DEBUG_PUSH
11124 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11125#endif
11126 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011127encoding_error:
11128 {
11129 char buffer[150];
11130
11131 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11132 ctxt->input->cur[0], ctxt->input->cur[1],
11133 ctxt->input->cur[2], ctxt->input->cur[3]);
11134 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11135 "Input is not proper UTF-8, indicate encoding !\n%s",
11136 BAD_CAST buffer, NULL);
11137 }
11138 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011139}
11140
11141/**
Owen Taylor3473f882001-02-23 17:55:21 +000011142 * xmlParseChunk:
11143 * @ctxt: an XML parser context
11144 * @chunk: an char array
11145 * @size: the size in byte of the chunk
11146 * @terminate: last chunk indicator
11147 *
11148 * Parse a Chunk of memory
11149 *
11150 * Returns zero if no error, the xmlParserErrors otherwise.
11151 */
11152int
11153xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11154 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011155 int end_in_lf = 0;
11156
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011157 if (ctxt == NULL)
11158 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011159 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011160 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011161 if (ctxt->instate == XML_PARSER_START)
11162 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011163 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11164 (chunk[size - 1] == '\r')) {
11165 end_in_lf = 1;
11166 size--;
11167 }
Owen Taylor3473f882001-02-23 17:55:21 +000011168 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11169 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11170 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11171 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011172 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011173
William M. Bracka3215c72004-07-31 16:24:01 +000011174 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11175 if (res < 0) {
11176 ctxt->errNo = XML_PARSER_EOF;
11177 ctxt->disableSAX = 1;
11178 return (XML_PARSER_EOF);
11179 }
Owen Taylor3473f882001-02-23 17:55:21 +000011180 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11181 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011182 ctxt->input->end =
11183 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011184#ifdef DEBUG_PUSH
11185 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11186#endif
11187
Owen Taylor3473f882001-02-23 17:55:21 +000011188 } else if (ctxt->instate != XML_PARSER_EOF) {
11189 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11190 xmlParserInputBufferPtr in = ctxt->input->buf;
11191 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11192 (in->raw != NULL)) {
11193 int nbchars;
11194
11195 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11196 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011197 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011198 xmlGenericError(xmlGenericErrorContext,
11199 "xmlParseChunk: encoder error\n");
11200 return(XML_ERR_INVALID_ENCODING);
11201 }
11202 }
11203 }
11204 }
11205 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011206 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11207 (ctxt->input->buf != NULL)) {
11208 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11209 }
Daniel Veillard14412512005-01-21 23:53:26 +000011210 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011211 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011212 if (terminate) {
11213 /*
11214 * Check for termination
11215 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011216 int avail = 0;
11217
11218 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011219 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011220 avail = ctxt->input->length -
11221 (ctxt->input->cur - ctxt->input->base);
11222 else
11223 avail = ctxt->input->buf->buffer->use -
11224 (ctxt->input->cur - ctxt->input->base);
11225 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011226
Owen Taylor3473f882001-02-23 17:55:21 +000011227 if ((ctxt->instate != XML_PARSER_EOF) &&
11228 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011230 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011231 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011232 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011233 }
Owen Taylor3473f882001-02-23 17:55:21 +000011234 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011236 ctxt->sax->endDocument(ctxt->userData);
11237 }
11238 ctxt->instate = XML_PARSER_EOF;
11239 }
11240 return((xmlParserErrors) ctxt->errNo);
11241}
11242
11243/************************************************************************
11244 * *
11245 * I/O front end functions to the parser *
11246 * *
11247 ************************************************************************/
11248
11249/**
Owen Taylor3473f882001-02-23 17:55:21 +000011250 * xmlCreatePushParserCtxt:
11251 * @sax: a SAX handler
11252 * @user_data: The user data returned on SAX callbacks
11253 * @chunk: a pointer to an array of chars
11254 * @size: number of chars in the array
11255 * @filename: an optional file name or URI
11256 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011257 * Create a parser context for using the XML parser in push mode.
11258 * If @buffer and @size are non-NULL, the data is used to detect
11259 * the encoding. The remaining characters will be parsed so they
11260 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011261 * To allow content encoding detection, @size should be >= 4
11262 * The value of @filename is used for fetching external entities
11263 * and error/warning reports.
11264 *
11265 * Returns the new parser context or NULL
11266 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011267
Owen Taylor3473f882001-02-23 17:55:21 +000011268xmlParserCtxtPtr
11269xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11270 const char *chunk, int size, const char *filename) {
11271 xmlParserCtxtPtr ctxt;
11272 xmlParserInputPtr inputStream;
11273 xmlParserInputBufferPtr buf;
11274 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11275
11276 /*
11277 * plug some encoding conversion routines
11278 */
11279 if ((chunk != NULL) && (size >= 4))
11280 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11281
11282 buf = xmlAllocParserInputBuffer(enc);
11283 if (buf == NULL) return(NULL);
11284
11285 ctxt = xmlNewParserCtxt();
11286 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011287 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011288 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011289 return(NULL);
11290 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011291 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011292 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11293 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011295 xmlFreeParserInputBuffer(buf);
11296 xmlFreeParserCtxt(ctxt);
11297 return(NULL);
11298 }
Owen Taylor3473f882001-02-23 17:55:21 +000011299 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011300#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011301 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011302#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011303 xmlFree(ctxt->sax);
11304 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11305 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011306 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011307 xmlFreeParserInputBuffer(buf);
11308 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011309 return(NULL);
11310 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011311 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11312 if (sax->initialized == XML_SAX2_MAGIC)
11313 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11314 else
11315 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011316 if (user_data != NULL)
11317 ctxt->userData = user_data;
11318 }
11319 if (filename == NULL) {
11320 ctxt->directory = NULL;
11321 } else {
11322 ctxt->directory = xmlParserGetDirectory(filename);
11323 }
11324
11325 inputStream = xmlNewInputStream(ctxt);
11326 if (inputStream == NULL) {
11327 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011328 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011329 return(NULL);
11330 }
11331
11332 if (filename == NULL)
11333 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011334 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011335 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011336 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011337 if (inputStream->filename == NULL) {
11338 xmlFreeParserCtxt(ctxt);
11339 xmlFreeParserInputBuffer(buf);
11340 return(NULL);
11341 }
11342 }
Owen Taylor3473f882001-02-23 17:55:21 +000011343 inputStream->buf = buf;
11344 inputStream->base = inputStream->buf->buffer->content;
11345 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011346 inputStream->end =
11347 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011348
11349 inputPush(ctxt, inputStream);
11350
William M. Brack3a1cd212005-02-11 14:35:54 +000011351 /*
11352 * If the caller didn't provide an initial 'chunk' for determining
11353 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11354 * that it can be automatically determined later
11355 */
11356 if ((size == 0) || (chunk == NULL)) {
11357 ctxt->charset = XML_CHAR_ENCODING_NONE;
11358 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011359 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11360 int cur = ctxt->input->cur - ctxt->input->base;
11361
Owen Taylor3473f882001-02-23 17:55:21 +000011362 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011363
11364 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11365 ctxt->input->cur = ctxt->input->base + cur;
11366 ctxt->input->end =
11367 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011368#ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11370#endif
11371 }
11372
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011373 if (enc != XML_CHAR_ENCODING_NONE) {
11374 xmlSwitchEncoding(ctxt, enc);
11375 }
11376
Owen Taylor3473f882001-02-23 17:55:21 +000011377 return(ctxt);
11378}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011379#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011380
11381/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011382 * xmlStopParser:
11383 * @ctxt: an XML parser context
11384 *
11385 * Blocks further parser processing
11386 */
11387void
11388xmlStopParser(xmlParserCtxtPtr ctxt) {
11389 if (ctxt == NULL)
11390 return;
11391 ctxt->instate = XML_PARSER_EOF;
11392 ctxt->disableSAX = 1;
11393 if (ctxt->input != NULL) {
11394 ctxt->input->cur = BAD_CAST"";
11395 ctxt->input->base = ctxt->input->cur;
11396 }
11397}
11398
11399/**
Owen Taylor3473f882001-02-23 17:55:21 +000011400 * xmlCreateIOParserCtxt:
11401 * @sax: a SAX handler
11402 * @user_data: The user data returned on SAX callbacks
11403 * @ioread: an I/O read function
11404 * @ioclose: an I/O close function
11405 * @ioctx: an I/O handler
11406 * @enc: the charset encoding if known
11407 *
11408 * Create a parser context for using the XML parser with an existing
11409 * I/O stream
11410 *
11411 * Returns the new parser context or NULL
11412 */
11413xmlParserCtxtPtr
11414xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11415 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11416 void *ioctx, xmlCharEncoding enc) {
11417 xmlParserCtxtPtr ctxt;
11418 xmlParserInputPtr inputStream;
11419 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011420
11421 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011422
11423 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11424 if (buf == NULL) return(NULL);
11425
11426 ctxt = xmlNewParserCtxt();
11427 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011428 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011429 return(NULL);
11430 }
11431 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011432#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011433 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011434#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011435 xmlFree(ctxt->sax);
11436 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11437 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011438 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011439 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011440 return(NULL);
11441 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011442 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11443 if (sax->initialized == XML_SAX2_MAGIC)
11444 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11445 else
11446 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011447 if (user_data != NULL)
11448 ctxt->userData = user_data;
11449 }
11450
11451 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11452 if (inputStream == NULL) {
11453 xmlFreeParserCtxt(ctxt);
11454 return(NULL);
11455 }
11456 inputPush(ctxt, inputStream);
11457
11458 return(ctxt);
11459}
11460
Daniel Veillard4432df22003-09-28 18:58:27 +000011461#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011462/************************************************************************
11463 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011464 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011465 * *
11466 ************************************************************************/
11467
11468/**
11469 * xmlIOParseDTD:
11470 * @sax: the SAX handler block or NULL
11471 * @input: an Input Buffer
11472 * @enc: the charset encoding if known
11473 *
11474 * Load and parse a DTD
11475 *
11476 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011477 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011478 */
11479
11480xmlDtdPtr
11481xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11482 xmlCharEncoding enc) {
11483 xmlDtdPtr ret = NULL;
11484 xmlParserCtxtPtr ctxt;
11485 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011486 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011487
11488 if (input == NULL)
11489 return(NULL);
11490
11491 ctxt = xmlNewParserCtxt();
11492 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011493 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011494 return(NULL);
11495 }
11496
11497 /*
11498 * Set-up the SAX context
11499 */
11500 if (sax != NULL) {
11501 if (ctxt->sax != NULL)
11502 xmlFree(ctxt->sax);
11503 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011504 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011505 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011506 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011507
11508 /*
11509 * generate a parser input from the I/O handler
11510 */
11511
Daniel Veillard43caefb2003-12-07 19:32:22 +000011512 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011513 if (pinput == NULL) {
11514 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011515 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011516 xmlFreeParserCtxt(ctxt);
11517 return(NULL);
11518 }
11519
11520 /*
11521 * plug some encoding conversion routines here.
11522 */
11523 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000011524 if (enc != XML_CHAR_ENCODING_NONE) {
11525 xmlSwitchEncoding(ctxt, enc);
11526 }
Owen Taylor3473f882001-02-23 17:55:21 +000011527
11528 pinput->filename = NULL;
11529 pinput->line = 1;
11530 pinput->col = 1;
11531 pinput->base = ctxt->input->cur;
11532 pinput->cur = ctxt->input->cur;
11533 pinput->free = NULL;
11534
11535 /*
11536 * let's parse that entity knowing it's an external subset.
11537 */
11538 ctxt->inSubset = 2;
11539 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11540 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11541 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011542
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011543 if ((enc == XML_CHAR_ENCODING_NONE) &&
11544 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011545 /*
11546 * Get the 4 first bytes and decode the charset
11547 * if enc != XML_CHAR_ENCODING_NONE
11548 * plug some encoding conversion routines.
11549 */
11550 start[0] = RAW;
11551 start[1] = NXT(1);
11552 start[2] = NXT(2);
11553 start[3] = NXT(3);
11554 enc = xmlDetectCharEncoding(start, 4);
11555 if (enc != XML_CHAR_ENCODING_NONE) {
11556 xmlSwitchEncoding(ctxt, enc);
11557 }
11558 }
11559
Owen Taylor3473f882001-02-23 17:55:21 +000011560 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11561
11562 if (ctxt->myDoc != NULL) {
11563 if (ctxt->wellFormed) {
11564 ret = ctxt->myDoc->extSubset;
11565 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011566 if (ret != NULL) {
11567 xmlNodePtr tmp;
11568
11569 ret->doc = NULL;
11570 tmp = ret->children;
11571 while (tmp != NULL) {
11572 tmp->doc = NULL;
11573 tmp = tmp->next;
11574 }
11575 }
Owen Taylor3473f882001-02-23 17:55:21 +000011576 } else {
11577 ret = NULL;
11578 }
11579 xmlFreeDoc(ctxt->myDoc);
11580 ctxt->myDoc = NULL;
11581 }
11582 if (sax != NULL) ctxt->sax = NULL;
11583 xmlFreeParserCtxt(ctxt);
11584
11585 return(ret);
11586}
11587
11588/**
11589 * xmlSAXParseDTD:
11590 * @sax: the SAX handler block
11591 * @ExternalID: a NAME* containing the External ID of the DTD
11592 * @SystemID: a NAME* containing the URL to the DTD
11593 *
11594 * Load and parse an external subset.
11595 *
11596 * Returns the resulting xmlDtdPtr or NULL in case of error.
11597 */
11598
11599xmlDtdPtr
11600xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11601 const xmlChar *SystemID) {
11602 xmlDtdPtr ret = NULL;
11603 xmlParserCtxtPtr ctxt;
11604 xmlParserInputPtr input = NULL;
11605 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011606 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011607
11608 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11609
11610 ctxt = xmlNewParserCtxt();
11611 if (ctxt == NULL) {
11612 return(NULL);
11613 }
11614
11615 /*
11616 * Set-up the SAX context
11617 */
11618 if (sax != NULL) {
11619 if (ctxt->sax != NULL)
11620 xmlFree(ctxt->sax);
11621 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011622 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011623 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011624
11625 /*
11626 * Canonicalise the system ID
11627 */
11628 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011629 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011630 xmlFreeParserCtxt(ctxt);
11631 return(NULL);
11632 }
Owen Taylor3473f882001-02-23 17:55:21 +000011633
11634 /*
11635 * Ask the Entity resolver to load the damn thing
11636 */
11637
11638 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011639 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11640 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011641 if (input == NULL) {
11642 if (sax != NULL) ctxt->sax = NULL;
11643 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011644 if (systemIdCanonic != NULL)
11645 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011646 return(NULL);
11647 }
11648
11649 /*
11650 * plug some encoding conversion routines here.
11651 */
11652 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011653 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11654 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11655 xmlSwitchEncoding(ctxt, enc);
11656 }
Owen Taylor3473f882001-02-23 17:55:21 +000011657
11658 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011659 input->filename = (char *) systemIdCanonic;
11660 else
11661 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011662 input->line = 1;
11663 input->col = 1;
11664 input->base = ctxt->input->cur;
11665 input->cur = ctxt->input->cur;
11666 input->free = NULL;
11667
11668 /*
11669 * let's parse that entity knowing it's an external subset.
11670 */
11671 ctxt->inSubset = 2;
11672 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11673 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11674 ExternalID, SystemID);
11675 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11676
11677 if (ctxt->myDoc != NULL) {
11678 if (ctxt->wellFormed) {
11679 ret = ctxt->myDoc->extSubset;
11680 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011681 if (ret != NULL) {
11682 xmlNodePtr tmp;
11683
11684 ret->doc = NULL;
11685 tmp = ret->children;
11686 while (tmp != NULL) {
11687 tmp->doc = NULL;
11688 tmp = tmp->next;
11689 }
11690 }
Owen Taylor3473f882001-02-23 17:55:21 +000011691 } else {
11692 ret = NULL;
11693 }
11694 xmlFreeDoc(ctxt->myDoc);
11695 ctxt->myDoc = NULL;
11696 }
11697 if (sax != NULL) ctxt->sax = NULL;
11698 xmlFreeParserCtxt(ctxt);
11699
11700 return(ret);
11701}
11702
Daniel Veillard4432df22003-09-28 18:58:27 +000011703
Owen Taylor3473f882001-02-23 17:55:21 +000011704/**
11705 * xmlParseDTD:
11706 * @ExternalID: a NAME* containing the External ID of the DTD
11707 * @SystemID: a NAME* containing the URL to the DTD
11708 *
11709 * Load and parse an external subset.
11710 *
11711 * Returns the resulting xmlDtdPtr or NULL in case of error.
11712 */
11713
11714xmlDtdPtr
11715xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11716 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11717}
Daniel Veillard4432df22003-09-28 18:58:27 +000011718#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011719
11720/************************************************************************
11721 * *
11722 * Front ends when parsing an Entity *
11723 * *
11724 ************************************************************************/
11725
11726/**
Owen Taylor3473f882001-02-23 17:55:21 +000011727 * xmlParseCtxtExternalEntity:
11728 * @ctx: the existing parsing context
11729 * @URL: the URL for the entity to load
11730 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011731 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011732 *
11733 * Parse an external general entity within an existing parsing context
11734 * An external general parsed entity is well-formed if it matches the
11735 * production labeled extParsedEnt.
11736 *
11737 * [78] extParsedEnt ::= TextDecl? content
11738 *
11739 * Returns 0 if the entity is well formed, -1 in case of args problem and
11740 * the parser error code otherwise
11741 */
11742
11743int
11744xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011745 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011746 xmlParserCtxtPtr ctxt;
11747 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011748 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011749 xmlSAXHandlerPtr oldsax = NULL;
11750 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011751 xmlChar start[4];
11752 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011753 xmlParserInputPtr inputStream;
11754 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011755
Daniel Veillardce682bc2004-11-05 17:22:25 +000011756 if (ctx == NULL) return(-1);
11757
Owen Taylor3473f882001-02-23 17:55:21 +000011758 if (ctx->depth > 40) {
11759 return(XML_ERR_ENTITY_LOOP);
11760 }
11761
Daniel Veillardcda96922001-08-21 10:56:31 +000011762 if (lst != NULL)
11763 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 if ((URL == NULL) && (ID == NULL))
11765 return(-1);
11766 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11767 return(-1);
11768
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011769 ctxt = xmlNewParserCtxt();
11770 if (ctxt == NULL) {
11771 return(-1);
11772 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011773
Owen Taylor3473f882001-02-23 17:55:21 +000011774 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011775 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011776
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011777 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11778 if (inputStream == NULL) {
11779 xmlFreeParserCtxt(ctxt);
11780 return(-1);
11781 }
11782
11783 inputPush(ctxt, inputStream);
11784
11785 if ((ctxt->directory == NULL) && (directory == NULL))
11786 directory = xmlParserGetDirectory((char *)URL);
11787 if ((ctxt->directory == NULL) && (directory != NULL))
11788 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011789
Owen Taylor3473f882001-02-23 17:55:21 +000011790 oldsax = ctxt->sax;
11791 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011792 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011793 newDoc = xmlNewDoc(BAD_CAST "1.0");
11794 if (newDoc == NULL) {
11795 xmlFreeParserCtxt(ctxt);
11796 return(-1);
11797 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011798 if (ctx->myDoc->dict) {
11799 newDoc->dict = ctx->myDoc->dict;
11800 xmlDictReference(newDoc->dict);
11801 }
Owen Taylor3473f882001-02-23 17:55:21 +000011802 if (ctx->myDoc != NULL) {
11803 newDoc->intSubset = ctx->myDoc->intSubset;
11804 newDoc->extSubset = ctx->myDoc->extSubset;
11805 }
11806 if (ctx->myDoc->URL != NULL) {
11807 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11808 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011809 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11810 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011811 ctxt->sax = oldsax;
11812 xmlFreeParserCtxt(ctxt);
11813 newDoc->intSubset = NULL;
11814 newDoc->extSubset = NULL;
11815 xmlFreeDoc(newDoc);
11816 return(-1);
11817 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011818 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011819 nodePush(ctxt, newDoc->children);
11820 if (ctx->myDoc == NULL) {
11821 ctxt->myDoc = newDoc;
11822 } else {
11823 ctxt->myDoc = ctx->myDoc;
11824 newDoc->children->doc = ctx->myDoc;
11825 }
11826
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011827 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000011828 * Get the 4 first bytes and decode the charset
11829 * if enc != XML_CHAR_ENCODING_NONE
11830 * plug some encoding conversion routines.
11831 */
11832 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011833 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11834 start[0] = RAW;
11835 start[1] = NXT(1);
11836 start[2] = NXT(2);
11837 start[3] = NXT(3);
11838 enc = xmlDetectCharEncoding(start, 4);
11839 if (enc != XML_CHAR_ENCODING_NONE) {
11840 xmlSwitchEncoding(ctxt, enc);
11841 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011842 }
11843
Owen Taylor3473f882001-02-23 17:55:21 +000011844 /*
11845 * Parse a possible text declaration first
11846 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011847 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011848 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011849 /*
11850 * An XML-1.0 document can't reference an entity not XML-1.0
11851 */
11852 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
11853 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11854 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11855 "Version mismatch between document and entity\n");
11856 }
Owen Taylor3473f882001-02-23 17:55:21 +000011857 }
11858
11859 /*
11860 * Doing validity checking on chunk doesn't make sense
11861 */
11862 ctxt->instate = XML_PARSER_CONTENT;
11863 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011864 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011865 ctxt->loadsubset = ctx->loadsubset;
11866 ctxt->depth = ctx->depth + 1;
11867 ctxt->replaceEntities = ctx->replaceEntities;
11868 if (ctxt->validate) {
11869 ctxt->vctxt.error = ctx->vctxt.error;
11870 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011871 } else {
11872 ctxt->vctxt.error = NULL;
11873 ctxt->vctxt.warning = NULL;
11874 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011875 ctxt->vctxt.nodeTab = NULL;
11876 ctxt->vctxt.nodeNr = 0;
11877 ctxt->vctxt.nodeMax = 0;
11878 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011879 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11880 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011881 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11882 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11883 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011884 ctxt->dictNames = ctx->dictNames;
11885 ctxt->attsDefault = ctx->attsDefault;
11886 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011887 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011888
11889 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011890
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011891 ctx->validate = ctxt->validate;
11892 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011893 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011894 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011895 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011896 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011897 }
11898 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011899 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011900 }
11901
11902 if (!ctxt->wellFormed) {
11903 if (ctxt->errNo == 0)
11904 ret = 1;
11905 else
11906 ret = ctxt->errNo;
11907 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011908 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011909 xmlNodePtr cur;
11910
11911 /*
11912 * Return the newly created nodeset after unlinking it from
11913 * they pseudo parent.
11914 */
11915 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011916 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011917 while (cur != NULL) {
11918 cur->parent = NULL;
11919 cur = cur->next;
11920 }
11921 newDoc->children->children = NULL;
11922 }
11923 ret = 0;
11924 }
11925 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011926 ctxt->dict = NULL;
11927 ctxt->attsDefault = NULL;
11928 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011929 xmlFreeParserCtxt(ctxt);
11930 newDoc->intSubset = NULL;
11931 newDoc->extSubset = NULL;
11932 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011933
Owen Taylor3473f882001-02-23 17:55:21 +000011934 return(ret);
11935}
11936
11937/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011938 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011939 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011940 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011941 * @sax: the SAX handler bloc (possibly NULL)
11942 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11943 * @depth: Used for loop detection, use 0
11944 * @URL: the URL for the entity to load
11945 * @ID: the System ID for the entity to load
11946 * @list: the return value for the set of parsed nodes
11947 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011948 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011949 *
11950 * Returns 0 if the entity is well formed, -1 in case of args problem and
11951 * the parser error code otherwise
11952 */
11953
Daniel Veillard7d515752003-09-26 19:12:37 +000011954static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011955xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11956 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011957 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011958 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011959 xmlParserCtxtPtr ctxt;
11960 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011961 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011962 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011963 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011964 xmlChar start[4];
11965 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011966
11967 if (depth > 40) {
11968 return(XML_ERR_ENTITY_LOOP);
11969 }
11970
11971
11972
11973 if (list != NULL)
11974 *list = NULL;
11975 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011976 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011977 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011978 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011979
11980
11981 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011982 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011983 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011984 if (oldctxt != NULL) {
11985 ctxt->_private = oldctxt->_private;
11986 ctxt->loadsubset = oldctxt->loadsubset;
11987 ctxt->validate = oldctxt->validate;
11988 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011989 ctxt->record_info = oldctxt->record_info;
11990 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11991 ctxt->node_seq.length = oldctxt->node_seq.length;
11992 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011993 } else {
11994 /*
11995 * Doing validity checking on chunk without context
11996 * doesn't make sense
11997 */
11998 ctxt->_private = NULL;
11999 ctxt->validate = 0;
12000 ctxt->external = 2;
12001 ctxt->loadsubset = 0;
12002 }
Owen Taylor3473f882001-02-23 17:55:21 +000012003 if (sax != NULL) {
12004 oldsax = ctxt->sax;
12005 ctxt->sax = sax;
12006 if (user_data != NULL)
12007 ctxt->userData = user_data;
12008 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012009 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012010 newDoc = xmlNewDoc(BAD_CAST "1.0");
12011 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012012 ctxt->node_seq.maximum = 0;
12013 ctxt->node_seq.length = 0;
12014 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012015 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012016 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012017 }
Daniel Veillard30e76072006-03-09 14:13:55 +000012018 newDoc->intSubset = doc->intSubset;
12019 newDoc->extSubset = doc->extSubset;
12020 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012021 xmlDictReference(newDoc->dict);
12022
Owen Taylor3473f882001-02-23 17:55:21 +000012023 if (doc->URL != NULL) {
12024 newDoc->URL = xmlStrdup(doc->URL);
12025 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012026 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12027 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012028 if (sax != NULL)
12029 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012030 ctxt->node_seq.maximum = 0;
12031 ctxt->node_seq.length = 0;
12032 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012033 xmlFreeParserCtxt(ctxt);
12034 newDoc->intSubset = NULL;
12035 newDoc->extSubset = NULL;
12036 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012037 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012038 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012039 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012040 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012041 ctxt->myDoc = doc;
12042 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012043
Daniel Veillard87a764e2001-06-20 17:41:10 +000012044 /*
12045 * Get the 4 first bytes and decode the charset
12046 * if enc != XML_CHAR_ENCODING_NONE
12047 * plug some encoding conversion routines.
12048 */
12049 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012050 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12051 start[0] = RAW;
12052 start[1] = NXT(1);
12053 start[2] = NXT(2);
12054 start[3] = NXT(3);
12055 enc = xmlDetectCharEncoding(start, 4);
12056 if (enc != XML_CHAR_ENCODING_NONE) {
12057 xmlSwitchEncoding(ctxt, enc);
12058 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012059 }
12060
Owen Taylor3473f882001-02-23 17:55:21 +000012061 /*
12062 * Parse a possible text declaration first
12063 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012064 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012065 xmlParseTextDecl(ctxt);
12066 }
12067
Owen Taylor3473f882001-02-23 17:55:21 +000012068 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012069 ctxt->depth = depth;
12070
12071 xmlParseContent(ctxt);
12072
Daniel Veillard561b7f82002-03-20 21:55:57 +000012073 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012074 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012075 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012076 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012077 }
12078 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012079 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012080 }
12081
12082 if (!ctxt->wellFormed) {
12083 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012084 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012085 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012086 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012087 } else {
12088 if (list != NULL) {
12089 xmlNodePtr cur;
12090
12091 /*
12092 * Return the newly created nodeset after unlinking it from
12093 * they pseudo parent.
12094 */
12095 cur = newDoc->children->children;
12096 *list = cur;
12097 while (cur != NULL) {
12098 cur->parent = NULL;
12099 cur = cur->next;
12100 }
12101 newDoc->children->children = NULL;
12102 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012103 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012104 }
12105 if (sax != NULL)
12106 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012107 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12108 oldctxt->node_seq.length = ctxt->node_seq.length;
12109 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012110 ctxt->node_seq.maximum = 0;
12111 ctxt->node_seq.length = 0;
12112 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012113 xmlFreeParserCtxt(ctxt);
12114 newDoc->intSubset = NULL;
12115 newDoc->extSubset = NULL;
12116 xmlFreeDoc(newDoc);
12117
12118 return(ret);
12119}
12120
Daniel Veillard81273902003-09-30 00:43:48 +000012121#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012122/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012123 * xmlParseExternalEntity:
12124 * @doc: the document the chunk pertains to
12125 * @sax: the SAX handler bloc (possibly NULL)
12126 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12127 * @depth: Used for loop detection, use 0
12128 * @URL: the URL for the entity to load
12129 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012130 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012131 *
12132 * Parse an external general entity
12133 * An external general parsed entity is well-formed if it matches the
12134 * production labeled extParsedEnt.
12135 *
12136 * [78] extParsedEnt ::= TextDecl? content
12137 *
12138 * Returns 0 if the entity is well formed, -1 in case of args problem and
12139 * the parser error code otherwise
12140 */
12141
12142int
12143xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012144 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012145 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012146 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012147}
12148
12149/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012150 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012151 * @doc: the document the chunk pertains to
12152 * @sax: the SAX handler bloc (possibly NULL)
12153 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12154 * @depth: Used for loop detection, use 0
12155 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012156 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012157 *
12158 * Parse a well-balanced chunk of an XML document
12159 * called by the parser
12160 * The allowed sequence for the Well Balanced Chunk is the one defined by
12161 * the content production in the XML grammar:
12162 *
12163 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12164 *
12165 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12166 * the parser error code otherwise
12167 */
12168
12169int
12170xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012171 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012172 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12173 depth, string, lst, 0 );
12174}
Daniel Veillard81273902003-09-30 00:43:48 +000012175#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012176
12177/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012178 * xmlParseBalancedChunkMemoryInternal:
12179 * @oldctxt: the existing parsing context
12180 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12181 * @user_data: the user data field for the parser context
12182 * @lst: the return value for the set of parsed nodes
12183 *
12184 *
12185 * Parse a well-balanced chunk of an XML document
12186 * called by the parser
12187 * The allowed sequence for the Well Balanced Chunk is the one defined by
12188 * the content production in the XML grammar:
12189 *
12190 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12191 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012192 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12193 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000012194 *
12195 * In case recover is set to 1, the nodelist will not be empty even if
12196 * the parsed chunk is not well balanced.
12197 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012198static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012199xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12200 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12201 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012202 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012203 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012204 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012205 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012206 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012207 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012208 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012209
12210 if (oldctxt->depth > 40) {
12211 return(XML_ERR_ENTITY_LOOP);
12212 }
12213
12214
12215 if (lst != NULL)
12216 *lst = NULL;
12217 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012218 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012219
12220 size = xmlStrlen(string);
12221
12222 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012223 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012224 if (user_data != NULL)
12225 ctxt->userData = user_data;
12226 else
12227 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012228 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12229 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012230 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12231 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12232 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012233
12234 oldsax = ctxt->sax;
12235 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012236 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012237 ctxt->replaceEntities = oldctxt->replaceEntities;
12238 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012239
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012240 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012241 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012242 newDoc = xmlNewDoc(BAD_CAST "1.0");
12243 if (newDoc == NULL) {
12244 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012245 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012246 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012247 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012248 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012249 newDoc->dict = ctxt->dict;
12250 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012251 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012252 } else {
12253 ctxt->myDoc = oldctxt->myDoc;
12254 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012255 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012256 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012257 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12258 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012259 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012260 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012261 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012262 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012263 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012264 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012265 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012266 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012267 ctxt->myDoc->children = NULL;
12268 ctxt->myDoc->last = NULL;
12269 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012270 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012271 ctxt->instate = XML_PARSER_CONTENT;
12272 ctxt->depth = oldctxt->depth + 1;
12273
Daniel Veillard328f48c2002-11-15 15:24:34 +000012274 ctxt->validate = 0;
12275 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012276 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12277 /*
12278 * ID/IDREF registration will be done in xmlValidateElement below
12279 */
12280 ctxt->loadsubset |= XML_SKIP_IDS;
12281 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012282 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012283 ctxt->attsDefault = oldctxt->attsDefault;
12284 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012285
Daniel Veillard68e9e742002-11-16 15:35:11 +000012286 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012287 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012288 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012289 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012290 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012291 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012292 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012293 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012294 }
12295
12296 if (!ctxt->wellFormed) {
12297 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012298 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012299 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012300 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012301 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012302 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012303 }
12304
William M. Brack7b9154b2003-09-27 19:23:50 +000012305 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012306 xmlNodePtr cur;
12307
12308 /*
12309 * Return the newly created nodeset after unlinking it from
12310 * they pseudo parent.
12311 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012312 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012313 *lst = cur;
12314 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012315#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012316 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12317 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12318 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012319 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12320 oldctxt->myDoc, cur);
12321 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012322#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012323 cur->parent = NULL;
12324 cur = cur->next;
12325 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012326 ctxt->myDoc->children->children = NULL;
12327 }
12328 if (ctxt->myDoc != NULL) {
12329 xmlFreeNode(ctxt->myDoc->children);
12330 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012331 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012332 }
12333
12334 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012335 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012336 ctxt->attsDefault = NULL;
12337 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012338 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012339 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012340 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012341 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012342
12343 return(ret);
12344}
12345
Daniel Veillard29b17482004-08-16 00:39:03 +000012346/**
12347 * xmlParseInNodeContext:
12348 * @node: the context node
12349 * @data: the input string
12350 * @datalen: the input string length in bytes
12351 * @options: a combination of xmlParserOption
12352 * @lst: the return value for the set of parsed nodes
12353 *
12354 * Parse a well-balanced chunk of an XML document
12355 * within the context (DTD, namespaces, etc ...) of the given node.
12356 *
12357 * The allowed sequence for the data is a Well Balanced Chunk defined by
12358 * the content production in the XML grammar:
12359 *
12360 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12361 *
12362 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12363 * error code otherwise
12364 */
12365xmlParserErrors
12366xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12367 int options, xmlNodePtr *lst) {
12368#ifdef SAX2
12369 xmlParserCtxtPtr ctxt;
12370 xmlDocPtr doc = NULL;
12371 xmlNodePtr fake, cur;
12372 int nsnr = 0;
12373
12374 xmlParserErrors ret = XML_ERR_OK;
12375
12376 /*
12377 * check all input parameters, grab the document
12378 */
12379 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12380 return(XML_ERR_INTERNAL_ERROR);
12381 switch (node->type) {
12382 case XML_ELEMENT_NODE:
12383 case XML_ATTRIBUTE_NODE:
12384 case XML_TEXT_NODE:
12385 case XML_CDATA_SECTION_NODE:
12386 case XML_ENTITY_REF_NODE:
12387 case XML_PI_NODE:
12388 case XML_COMMENT_NODE:
12389 case XML_DOCUMENT_NODE:
12390 case XML_HTML_DOCUMENT_NODE:
12391 break;
12392 default:
12393 return(XML_ERR_INTERNAL_ERROR);
12394
12395 }
12396 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12397 (node->type != XML_DOCUMENT_NODE) &&
12398 (node->type != XML_HTML_DOCUMENT_NODE))
12399 node = node->parent;
12400 if (node == NULL)
12401 return(XML_ERR_INTERNAL_ERROR);
12402 if (node->type == XML_ELEMENT_NODE)
12403 doc = node->doc;
12404 else
12405 doc = (xmlDocPtr) node;
12406 if (doc == NULL)
12407 return(XML_ERR_INTERNAL_ERROR);
12408
12409 /*
12410 * allocate a context and set-up everything not related to the
12411 * node position in the tree
12412 */
12413 if (doc->type == XML_DOCUMENT_NODE)
12414 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12415#ifdef LIBXML_HTML_ENABLED
12416 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12417 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12418#endif
12419 else
12420 return(XML_ERR_INTERNAL_ERROR);
12421
12422 if (ctxt == NULL)
12423 return(XML_ERR_NO_MEMORY);
12424 fake = xmlNewComment(NULL);
12425 if (fake == NULL) {
12426 xmlFreeParserCtxt(ctxt);
12427 return(XML_ERR_NO_MEMORY);
12428 }
12429 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012430
12431 /*
12432 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12433 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12434 * we must wait until the last moment to free the original one.
12435 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012436 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012437 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012438 xmlDictFree(ctxt->dict);
12439 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012440 } else
12441 options |= XML_PARSE_NODICT;
12442
Daniel Veillard37334572008-07-31 08:20:02 +000012443 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012444 xmlDetectSAX2(ctxt);
12445 ctxt->myDoc = doc;
12446
12447 if (node->type == XML_ELEMENT_NODE) {
12448 nodePush(ctxt, node);
12449 /*
12450 * initialize the SAX2 namespaces stack
12451 */
12452 cur = node;
12453 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12454 xmlNsPtr ns = cur->nsDef;
12455 const xmlChar *iprefix, *ihref;
12456
12457 while (ns != NULL) {
12458 if (ctxt->dict) {
12459 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12460 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12461 } else {
12462 iprefix = ns->prefix;
12463 ihref = ns->href;
12464 }
12465
12466 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12467 nsPush(ctxt, iprefix, ihref);
12468 nsnr++;
12469 }
12470 ns = ns->next;
12471 }
12472 cur = cur->parent;
12473 }
12474 ctxt->instate = XML_PARSER_CONTENT;
12475 }
12476
12477 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12478 /*
12479 * ID/IDREF registration will be done in xmlValidateElement below
12480 */
12481 ctxt->loadsubset |= XML_SKIP_IDS;
12482 }
12483
Daniel Veillard499cc922006-01-18 17:22:35 +000012484#ifdef LIBXML_HTML_ENABLED
12485 if (doc->type == XML_HTML_DOCUMENT_NODE)
12486 __htmlParseContent(ctxt);
12487 else
12488#endif
12489 xmlParseContent(ctxt);
12490
Daniel Veillard29b17482004-08-16 00:39:03 +000012491 nsPop(ctxt, nsnr);
12492 if ((RAW == '<') && (NXT(1) == '/')) {
12493 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12494 } else if (RAW != 0) {
12495 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12496 }
12497 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12498 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12499 ctxt->wellFormed = 0;
12500 }
12501
12502 if (!ctxt->wellFormed) {
12503 if (ctxt->errNo == 0)
12504 ret = XML_ERR_INTERNAL_ERROR;
12505 else
12506 ret = (xmlParserErrors)ctxt->errNo;
12507 } else {
12508 ret = XML_ERR_OK;
12509 }
12510
12511 /*
12512 * Return the newly created nodeset after unlinking it from
12513 * the pseudo sibling.
12514 */
12515
12516 cur = fake->next;
12517 fake->next = NULL;
12518 node->last = fake;
12519
12520 if (cur != NULL) {
12521 cur->prev = NULL;
12522 }
12523
12524 *lst = cur;
12525
12526 while (cur != NULL) {
12527 cur->parent = NULL;
12528 cur = cur->next;
12529 }
12530
12531 xmlUnlinkNode(fake);
12532 xmlFreeNode(fake);
12533
12534
12535 if (ret != XML_ERR_OK) {
12536 xmlFreeNodeList(*lst);
12537 *lst = NULL;
12538 }
William M. Brackc3f81342004-10-03 01:22:44 +000012539
William M. Brackb7b54de2004-10-06 16:38:01 +000012540 if (doc->dict != NULL)
12541 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012542 xmlFreeParserCtxt(ctxt);
12543
12544 return(ret);
12545#else /* !SAX2 */
12546 return(XML_ERR_INTERNAL_ERROR);
12547#endif
12548}
12549
Daniel Veillard81273902003-09-30 00:43:48 +000012550#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012551/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012552 * xmlParseBalancedChunkMemoryRecover:
12553 * @doc: the document the chunk pertains to
12554 * @sax: the SAX handler bloc (possibly NULL)
12555 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12556 * @depth: Used for loop detection, use 0
12557 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12558 * @lst: the return value for the set of parsed nodes
12559 * @recover: return nodes even if the data is broken (use 0)
12560 *
12561 *
12562 * Parse a well-balanced chunk of an XML document
12563 * called by the parser
12564 * The allowed sequence for the Well Balanced Chunk is the one defined by
12565 * the content production in the XML grammar:
12566 *
12567 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12568 *
12569 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12570 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012571 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012572 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012573 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12574 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012575 */
12576int
12577xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012578 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012579 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012580 xmlParserCtxtPtr ctxt;
12581 xmlDocPtr newDoc;
12582 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012583 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012584 int size;
12585 int ret = 0;
12586
12587 if (depth > 40) {
12588 return(XML_ERR_ENTITY_LOOP);
12589 }
12590
12591
Daniel Veillardcda96922001-08-21 10:56:31 +000012592 if (lst != NULL)
12593 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012594 if (string == NULL)
12595 return(-1);
12596
12597 size = xmlStrlen(string);
12598
12599 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12600 if (ctxt == NULL) return(-1);
12601 ctxt->userData = ctxt;
12602 if (sax != NULL) {
12603 oldsax = ctxt->sax;
12604 ctxt->sax = sax;
12605 if (user_data != NULL)
12606 ctxt->userData = user_data;
12607 }
12608 newDoc = xmlNewDoc(BAD_CAST "1.0");
12609 if (newDoc == NULL) {
12610 xmlFreeParserCtxt(ctxt);
12611 return(-1);
12612 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012613 if ((doc != NULL) && (doc->dict != NULL)) {
12614 xmlDictFree(ctxt->dict);
12615 ctxt->dict = doc->dict;
12616 xmlDictReference(ctxt->dict);
12617 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12618 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12619 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12620 ctxt->dictNames = 1;
12621 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012622 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012623 }
Owen Taylor3473f882001-02-23 17:55:21 +000012624 if (doc != NULL) {
12625 newDoc->intSubset = doc->intSubset;
12626 newDoc->extSubset = doc->extSubset;
12627 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012628 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12629 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012630 if (sax != NULL)
12631 ctxt->sax = oldsax;
12632 xmlFreeParserCtxt(ctxt);
12633 newDoc->intSubset = NULL;
12634 newDoc->extSubset = NULL;
12635 xmlFreeDoc(newDoc);
12636 return(-1);
12637 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012638 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12639 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012640 if (doc == NULL) {
12641 ctxt->myDoc = newDoc;
12642 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012643 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012644 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012645 /* Ensure that doc has XML spec namespace */
12646 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12647 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012648 }
12649 ctxt->instate = XML_PARSER_CONTENT;
12650 ctxt->depth = depth;
12651
12652 /*
12653 * Doing validity checking on chunk doesn't make sense
12654 */
12655 ctxt->validate = 0;
12656 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012657 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012658
Daniel Veillardb39bc392002-10-26 19:29:51 +000012659 if ( doc != NULL ){
12660 content = doc->children;
12661 doc->children = NULL;
12662 xmlParseContent(ctxt);
12663 doc->children = content;
12664 }
12665 else {
12666 xmlParseContent(ctxt);
12667 }
Owen Taylor3473f882001-02-23 17:55:21 +000012668 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012669 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012670 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012671 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012672 }
12673 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012674 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012675 }
12676
12677 if (!ctxt->wellFormed) {
12678 if (ctxt->errNo == 0)
12679 ret = 1;
12680 else
12681 ret = ctxt->errNo;
12682 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012683 ret = 0;
12684 }
12685
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012686 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12687 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012688
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012689 /*
12690 * Return the newly created nodeset after unlinking it from
12691 * they pseudo parent.
12692 */
12693 cur = newDoc->children->children;
12694 *lst = cur;
12695 while (cur != NULL) {
12696 xmlSetTreeDoc(cur, doc);
12697 cur->parent = NULL;
12698 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012699 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012700 newDoc->children->children = NULL;
12701 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012702
Owen Taylor3473f882001-02-23 17:55:21 +000012703 if (sax != NULL)
12704 ctxt->sax = oldsax;
12705 xmlFreeParserCtxt(ctxt);
12706 newDoc->intSubset = NULL;
12707 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012708 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012709 xmlFreeDoc(newDoc);
12710
12711 return(ret);
12712}
12713
12714/**
12715 * xmlSAXParseEntity:
12716 * @sax: the SAX handler block
12717 * @filename: the filename
12718 *
12719 * parse an XML external entity out of context and build a tree.
12720 * It use the given SAX function block to handle the parsing callback.
12721 * If sax is NULL, fallback to the default DOM tree building routines.
12722 *
12723 * [78] extParsedEnt ::= TextDecl? content
12724 *
12725 * This correspond to a "Well Balanced" chunk
12726 *
12727 * Returns the resulting document tree
12728 */
12729
12730xmlDocPtr
12731xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12732 xmlDocPtr ret;
12733 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012734
12735 ctxt = xmlCreateFileParserCtxt(filename);
12736 if (ctxt == NULL) {
12737 return(NULL);
12738 }
12739 if (sax != NULL) {
12740 if (ctxt->sax != NULL)
12741 xmlFree(ctxt->sax);
12742 ctxt->sax = sax;
12743 ctxt->userData = NULL;
12744 }
12745
Owen Taylor3473f882001-02-23 17:55:21 +000012746 xmlParseExtParsedEnt(ctxt);
12747
12748 if (ctxt->wellFormed)
12749 ret = ctxt->myDoc;
12750 else {
12751 ret = NULL;
12752 xmlFreeDoc(ctxt->myDoc);
12753 ctxt->myDoc = NULL;
12754 }
12755 if (sax != NULL)
12756 ctxt->sax = NULL;
12757 xmlFreeParserCtxt(ctxt);
12758
12759 return(ret);
12760}
12761
12762/**
12763 * xmlParseEntity:
12764 * @filename: the filename
12765 *
12766 * parse an XML external entity out of context and build a tree.
12767 *
12768 * [78] extParsedEnt ::= TextDecl? content
12769 *
12770 * This correspond to a "Well Balanced" chunk
12771 *
12772 * Returns the resulting document tree
12773 */
12774
12775xmlDocPtr
12776xmlParseEntity(const char *filename) {
12777 return(xmlSAXParseEntity(NULL, filename));
12778}
Daniel Veillard81273902003-09-30 00:43:48 +000012779#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012780
12781/**
12782 * xmlCreateEntityParserCtxt:
12783 * @URL: the entity URL
12784 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012785 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012786 *
12787 * Create a parser context for an external entity
12788 * Automatic support for ZLIB/Compress compressed document is provided
12789 * by default if found at compile-time.
12790 *
12791 * Returns the new parser context or NULL
12792 */
12793xmlParserCtxtPtr
12794xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12795 const xmlChar *base) {
12796 xmlParserCtxtPtr ctxt;
12797 xmlParserInputPtr inputStream;
12798 char *directory = NULL;
12799 xmlChar *uri;
12800
12801 ctxt = xmlNewParserCtxt();
12802 if (ctxt == NULL) {
12803 return(NULL);
12804 }
12805
12806 uri = xmlBuildURI(URL, base);
12807
12808 if (uri == NULL) {
12809 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12810 if (inputStream == NULL) {
12811 xmlFreeParserCtxt(ctxt);
12812 return(NULL);
12813 }
12814
12815 inputPush(ctxt, inputStream);
12816
12817 if ((ctxt->directory == NULL) && (directory == NULL))
12818 directory = xmlParserGetDirectory((char *)URL);
12819 if ((ctxt->directory == NULL) && (directory != NULL))
12820 ctxt->directory = directory;
12821 } else {
12822 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12823 if (inputStream == NULL) {
12824 xmlFree(uri);
12825 xmlFreeParserCtxt(ctxt);
12826 return(NULL);
12827 }
12828
12829 inputPush(ctxt, inputStream);
12830
12831 if ((ctxt->directory == NULL) && (directory == NULL))
12832 directory = xmlParserGetDirectory((char *)uri);
12833 if ((ctxt->directory == NULL) && (directory != NULL))
12834 ctxt->directory = directory;
12835 xmlFree(uri);
12836 }
Owen Taylor3473f882001-02-23 17:55:21 +000012837 return(ctxt);
12838}
12839
12840/************************************************************************
12841 * *
12842 * Front ends when parsing from a file *
12843 * *
12844 ************************************************************************/
12845
12846/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012847 * xmlCreateURLParserCtxt:
12848 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012849 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012850 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012851 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012852 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012853 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012854 *
12855 * Returns the new parser context or NULL
12856 */
12857xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012858xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012859{
12860 xmlParserCtxtPtr ctxt;
12861 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012862 char *directory = NULL;
12863
Owen Taylor3473f882001-02-23 17:55:21 +000012864 ctxt = xmlNewParserCtxt();
12865 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012866 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012867 return(NULL);
12868 }
12869
Daniel Veillarddf292f72005-01-16 19:00:15 +000012870 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000012871 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000012872 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000012873
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012874 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012875 if (inputStream == NULL) {
12876 xmlFreeParserCtxt(ctxt);
12877 return(NULL);
12878 }
12879
Owen Taylor3473f882001-02-23 17:55:21 +000012880 inputPush(ctxt, inputStream);
12881 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012882 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012883 if ((ctxt->directory == NULL) && (directory != NULL))
12884 ctxt->directory = directory;
12885
12886 return(ctxt);
12887}
12888
Daniel Veillard61b93382003-11-03 14:28:31 +000012889/**
12890 * xmlCreateFileParserCtxt:
12891 * @filename: the filename
12892 *
12893 * Create a parser context for a file content.
12894 * Automatic support for ZLIB/Compress compressed document is provided
12895 * by default if found at compile-time.
12896 *
12897 * Returns the new parser context or NULL
12898 */
12899xmlParserCtxtPtr
12900xmlCreateFileParserCtxt(const char *filename)
12901{
12902 return(xmlCreateURLParserCtxt(filename, 0));
12903}
12904
Daniel Veillard81273902003-09-30 00:43:48 +000012905#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012906/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012907 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012908 * @sax: the SAX handler block
12909 * @filename: the filename
12910 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12911 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012912 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012913 *
12914 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12915 * compressed document is provided by default if found at compile-time.
12916 * It use the given SAX function block to handle the parsing callback.
12917 * If sax is NULL, fallback to the default DOM tree building routines.
12918 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012919 * User data (void *) is stored within the parser context in the
12920 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012921 *
Owen Taylor3473f882001-02-23 17:55:21 +000012922 * Returns the resulting document tree
12923 */
12924
12925xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012926xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12927 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012928 xmlDocPtr ret;
12929 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012930
Daniel Veillard635ef722001-10-29 11:48:19 +000012931 xmlInitParser();
12932
Owen Taylor3473f882001-02-23 17:55:21 +000012933 ctxt = xmlCreateFileParserCtxt(filename);
12934 if (ctxt == NULL) {
12935 return(NULL);
12936 }
12937 if (sax != NULL) {
12938 if (ctxt->sax != NULL)
12939 xmlFree(ctxt->sax);
12940 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012941 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012942 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012943 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012944 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012945 }
Owen Taylor3473f882001-02-23 17:55:21 +000012946
Daniel Veillard37d2d162008-03-14 10:54:00 +000012947 if (ctxt->directory == NULL)
12948 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012949
Daniel Veillarddad3f682002-11-17 16:47:27 +000012950 ctxt->recovery = recovery;
12951
Owen Taylor3473f882001-02-23 17:55:21 +000012952 xmlParseDocument(ctxt);
12953
William M. Brackc07329e2003-09-08 01:57:30 +000012954 if ((ctxt->wellFormed) || recovery) {
12955 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012956 if (ret != NULL) {
12957 if (ctxt->input->buf->compressed > 0)
12958 ret->compression = 9;
12959 else
12960 ret->compression = ctxt->input->buf->compressed;
12961 }
William M. Brackc07329e2003-09-08 01:57:30 +000012962 }
Owen Taylor3473f882001-02-23 17:55:21 +000012963 else {
12964 ret = NULL;
12965 xmlFreeDoc(ctxt->myDoc);
12966 ctxt->myDoc = NULL;
12967 }
12968 if (sax != NULL)
12969 ctxt->sax = NULL;
12970 xmlFreeParserCtxt(ctxt);
12971
12972 return(ret);
12973}
12974
12975/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012976 * xmlSAXParseFile:
12977 * @sax: the SAX handler block
12978 * @filename: the filename
12979 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12980 * documents
12981 *
12982 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12983 * compressed document is provided by default if found at compile-time.
12984 * It use the given SAX function block to handle the parsing callback.
12985 * If sax is NULL, fallback to the default DOM tree building routines.
12986 *
12987 * Returns the resulting document tree
12988 */
12989
12990xmlDocPtr
12991xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12992 int recovery) {
12993 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12994}
12995
12996/**
Owen Taylor3473f882001-02-23 17:55:21 +000012997 * xmlRecoverDoc:
12998 * @cur: a pointer to an array of xmlChar
12999 *
13000 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013001 * In the case the document is not Well Formed, a attempt to build a
13002 * tree is tried anyway
13003 *
13004 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013005 */
13006
13007xmlDocPtr
13008xmlRecoverDoc(xmlChar *cur) {
13009 return(xmlSAXParseDoc(NULL, cur, 1));
13010}
13011
13012/**
13013 * xmlParseFile:
13014 * @filename: the filename
13015 *
13016 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13017 * compressed document is provided by default if found at compile-time.
13018 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013019 * Returns the resulting document tree if the file was wellformed,
13020 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013021 */
13022
13023xmlDocPtr
13024xmlParseFile(const char *filename) {
13025 return(xmlSAXParseFile(NULL, filename, 0));
13026}
13027
13028/**
13029 * xmlRecoverFile:
13030 * @filename: the filename
13031 *
13032 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13033 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013034 * In the case the document is not Well Formed, it attempts to build
13035 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013036 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013037 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013038 */
13039
13040xmlDocPtr
13041xmlRecoverFile(const char *filename) {
13042 return(xmlSAXParseFile(NULL, filename, 1));
13043}
13044
13045
13046/**
13047 * xmlSetupParserForBuffer:
13048 * @ctxt: an XML parser context
13049 * @buffer: a xmlChar * buffer
13050 * @filename: a file name
13051 *
13052 * Setup the parser context to parse a new buffer; Clears any prior
13053 * contents from the parser context. The buffer parameter must not be
13054 * NULL, but the filename parameter can be
13055 */
13056void
13057xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13058 const char* filename)
13059{
13060 xmlParserInputPtr input;
13061
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013062 if ((ctxt == NULL) || (buffer == NULL))
13063 return;
13064
Owen Taylor3473f882001-02-23 17:55:21 +000013065 input = xmlNewInputStream(ctxt);
13066 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013067 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013068 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013069 return;
13070 }
13071
13072 xmlClearParserCtxt(ctxt);
13073 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013074 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013075 input->base = buffer;
13076 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013077 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013078 inputPush(ctxt, input);
13079}
13080
13081/**
13082 * xmlSAXUserParseFile:
13083 * @sax: a SAX handler
13084 * @user_data: The user data returned on SAX callbacks
13085 * @filename: a file name
13086 *
13087 * parse an XML file and call the given SAX handler routines.
13088 * Automatic support for ZLIB/Compress compressed document is provided
13089 *
13090 * Returns 0 in case of success or a error number otherwise
13091 */
13092int
13093xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13094 const char *filename) {
13095 int ret = 0;
13096 xmlParserCtxtPtr ctxt;
13097
13098 ctxt = xmlCreateFileParserCtxt(filename);
13099 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013100 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013101 xmlFree(ctxt->sax);
13102 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013103 xmlDetectSAX2(ctxt);
13104
Owen Taylor3473f882001-02-23 17:55:21 +000013105 if (user_data != NULL)
13106 ctxt->userData = user_data;
13107
13108 xmlParseDocument(ctxt);
13109
13110 if (ctxt->wellFormed)
13111 ret = 0;
13112 else {
13113 if (ctxt->errNo != 0)
13114 ret = ctxt->errNo;
13115 else
13116 ret = -1;
13117 }
13118 if (sax != NULL)
13119 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013120 if (ctxt->myDoc != NULL) {
13121 xmlFreeDoc(ctxt->myDoc);
13122 ctxt->myDoc = NULL;
13123 }
Owen Taylor3473f882001-02-23 17:55:21 +000013124 xmlFreeParserCtxt(ctxt);
13125
13126 return ret;
13127}
Daniel Veillard81273902003-09-30 00:43:48 +000013128#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013129
13130/************************************************************************
13131 * *
13132 * Front ends when parsing from memory *
13133 * *
13134 ************************************************************************/
13135
13136/**
13137 * xmlCreateMemoryParserCtxt:
13138 * @buffer: a pointer to a char array
13139 * @size: the size of the array
13140 *
13141 * Create a parser context for an XML in-memory document.
13142 *
13143 * Returns the new parser context or NULL
13144 */
13145xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013146xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013147 xmlParserCtxtPtr ctxt;
13148 xmlParserInputPtr input;
13149 xmlParserInputBufferPtr buf;
13150
13151 if (buffer == NULL)
13152 return(NULL);
13153 if (size <= 0)
13154 return(NULL);
13155
13156 ctxt = xmlNewParserCtxt();
13157 if (ctxt == NULL)
13158 return(NULL);
13159
Daniel Veillard53350552003-09-18 13:35:51 +000013160 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013161 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013162 if (buf == NULL) {
13163 xmlFreeParserCtxt(ctxt);
13164 return(NULL);
13165 }
Owen Taylor3473f882001-02-23 17:55:21 +000013166
13167 input = xmlNewInputStream(ctxt);
13168 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013169 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013170 xmlFreeParserCtxt(ctxt);
13171 return(NULL);
13172 }
13173
13174 input->filename = NULL;
13175 input->buf = buf;
13176 input->base = input->buf->buffer->content;
13177 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013178 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013179
13180 inputPush(ctxt, input);
13181 return(ctxt);
13182}
13183
Daniel Veillard81273902003-09-30 00:43:48 +000013184#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013185/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013186 * xmlSAXParseMemoryWithData:
13187 * @sax: the SAX handler block
13188 * @buffer: an pointer to a char array
13189 * @size: the size of the array
13190 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13191 * documents
13192 * @data: the userdata
13193 *
13194 * parse an XML in-memory block and use the given SAX function block
13195 * to handle the parsing callback. If sax is NULL, fallback to the default
13196 * DOM tree building routines.
13197 *
13198 * User data (void *) is stored within the parser context in the
13199 * context's _private member, so it is available nearly everywhere in libxml
13200 *
13201 * Returns the resulting document tree
13202 */
13203
13204xmlDocPtr
13205xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13206 int size, int recovery, void *data) {
13207 xmlDocPtr ret;
13208 xmlParserCtxtPtr ctxt;
13209
13210 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13211 if (ctxt == NULL) return(NULL);
13212 if (sax != NULL) {
13213 if (ctxt->sax != NULL)
13214 xmlFree(ctxt->sax);
13215 ctxt->sax = sax;
13216 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013217 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013218 if (data!=NULL) {
13219 ctxt->_private=data;
13220 }
13221
Daniel Veillardadba5f12003-04-04 16:09:01 +000013222 ctxt->recovery = recovery;
13223
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013224 xmlParseDocument(ctxt);
13225
13226 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13227 else {
13228 ret = NULL;
13229 xmlFreeDoc(ctxt->myDoc);
13230 ctxt->myDoc = NULL;
13231 }
13232 if (sax != NULL)
13233 ctxt->sax = NULL;
13234 xmlFreeParserCtxt(ctxt);
13235
13236 return(ret);
13237}
13238
13239/**
Owen Taylor3473f882001-02-23 17:55:21 +000013240 * xmlSAXParseMemory:
13241 * @sax: the SAX handler block
13242 * @buffer: an pointer to a char array
13243 * @size: the size of the array
13244 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13245 * documents
13246 *
13247 * parse an XML in-memory block and use the given SAX function block
13248 * to handle the parsing callback. If sax is NULL, fallback to the default
13249 * DOM tree building routines.
13250 *
13251 * Returns the resulting document tree
13252 */
13253xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013254xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13255 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013256 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013257}
13258
13259/**
13260 * xmlParseMemory:
13261 * @buffer: an pointer to a char array
13262 * @size: the size of the array
13263 *
13264 * parse an XML in-memory block and build a tree.
13265 *
13266 * Returns the resulting document tree
13267 */
13268
Daniel Veillard50822cb2001-07-26 20:05:51 +000013269xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013270 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13271}
13272
13273/**
13274 * xmlRecoverMemory:
13275 * @buffer: an pointer to a char array
13276 * @size: the size of the array
13277 *
13278 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013279 * In the case the document is not Well Formed, an attempt to
13280 * build a tree is tried anyway
13281 *
13282 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013283 */
13284
Daniel Veillard50822cb2001-07-26 20:05:51 +000013285xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013286 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13287}
13288
13289/**
13290 * xmlSAXUserParseMemory:
13291 * @sax: a SAX handler
13292 * @user_data: The user data returned on SAX callbacks
13293 * @buffer: an in-memory XML document input
13294 * @size: the length of the XML document in bytes
13295 *
13296 * A better SAX parsing routine.
13297 * parse an XML in-memory buffer and call the given SAX handler routines.
13298 *
13299 * Returns 0 in case of success or a error number otherwise
13300 */
13301int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013302 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013303 int ret = 0;
13304 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013305
13306 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13307 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013308 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13309 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013310 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013311 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013312
Daniel Veillard30211a02001-04-26 09:33:18 +000013313 if (user_data != NULL)
13314 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013315
13316 xmlParseDocument(ctxt);
13317
13318 if (ctxt->wellFormed)
13319 ret = 0;
13320 else {
13321 if (ctxt->errNo != 0)
13322 ret = ctxt->errNo;
13323 else
13324 ret = -1;
13325 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013326 if (sax != NULL)
13327 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013328 if (ctxt->myDoc != NULL) {
13329 xmlFreeDoc(ctxt->myDoc);
13330 ctxt->myDoc = NULL;
13331 }
Owen Taylor3473f882001-02-23 17:55:21 +000013332 xmlFreeParserCtxt(ctxt);
13333
13334 return ret;
13335}
Daniel Veillard81273902003-09-30 00:43:48 +000013336#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013337
13338/**
13339 * xmlCreateDocParserCtxt:
13340 * @cur: a pointer to an array of xmlChar
13341 *
13342 * Creates a parser context for an XML in-memory document.
13343 *
13344 * Returns the new parser context or NULL
13345 */
13346xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013347xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013348 int len;
13349
13350 if (cur == NULL)
13351 return(NULL);
13352 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013353 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013354}
13355
Daniel Veillard81273902003-09-30 00:43:48 +000013356#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013357/**
13358 * xmlSAXParseDoc:
13359 * @sax: the SAX handler block
13360 * @cur: a pointer to an array of xmlChar
13361 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13362 * documents
13363 *
13364 * parse an XML in-memory document and build a tree.
13365 * It use the given SAX function block to handle the parsing callback.
13366 * If sax is NULL, fallback to the default DOM tree building routines.
13367 *
13368 * Returns the resulting document tree
13369 */
13370
13371xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013372xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013373 xmlDocPtr ret;
13374 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013375 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013376
Daniel Veillard38936062004-11-04 17:45:11 +000013377 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013378
13379
13380 ctxt = xmlCreateDocParserCtxt(cur);
13381 if (ctxt == NULL) return(NULL);
13382 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013383 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013384 ctxt->sax = sax;
13385 ctxt->userData = NULL;
13386 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013387 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013388
13389 xmlParseDocument(ctxt);
13390 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13391 else {
13392 ret = NULL;
13393 xmlFreeDoc(ctxt->myDoc);
13394 ctxt->myDoc = NULL;
13395 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013396 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013397 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013398 xmlFreeParserCtxt(ctxt);
13399
13400 return(ret);
13401}
13402
13403/**
13404 * xmlParseDoc:
13405 * @cur: a pointer to an array of xmlChar
13406 *
13407 * parse an XML in-memory document and build a tree.
13408 *
13409 * Returns the resulting document tree
13410 */
13411
13412xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013413xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013414 return(xmlSAXParseDoc(NULL, cur, 0));
13415}
Daniel Veillard81273902003-09-30 00:43:48 +000013416#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013417
Daniel Veillard81273902003-09-30 00:43:48 +000013418#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013419/************************************************************************
13420 * *
13421 * Specific function to keep track of entities references *
13422 * and used by the XSLT debugger *
13423 * *
13424 ************************************************************************/
13425
13426static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13427
13428/**
13429 * xmlAddEntityReference:
13430 * @ent : A valid entity
13431 * @firstNode : A valid first node for children of entity
13432 * @lastNode : A valid last node of children entity
13433 *
13434 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13435 */
13436static void
13437xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13438 xmlNodePtr lastNode)
13439{
13440 if (xmlEntityRefFunc != NULL) {
13441 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13442 }
13443}
13444
13445
13446/**
13447 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013448 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013449 *
13450 * Set the function to call call back when a xml reference has been made
13451 */
13452void
13453xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13454{
13455 xmlEntityRefFunc = func;
13456}
Daniel Veillard81273902003-09-30 00:43:48 +000013457#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013458
13459/************************************************************************
13460 * *
13461 * Miscellaneous *
13462 * *
13463 ************************************************************************/
13464
13465#ifdef LIBXML_XPATH_ENABLED
13466#include <libxml/xpath.h>
13467#endif
13468
Daniel Veillardffa3c742005-07-21 13:24:09 +000013469extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013470static int xmlParserInitialized = 0;
13471
13472/**
13473 * xmlInitParser:
13474 *
13475 * Initialization function for the XML parser.
13476 * This is not reentrant. Call once before processing in case of
13477 * use in multithreaded programs.
13478 */
13479
13480void
13481xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013482 if (xmlParserInitialized != 0)
13483 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013484
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013485#ifdef LIBXML_THREAD_ENABLED
13486 __xmlGlobalInitMutexLock();
13487 if (xmlParserInitialized == 0) {
13488#endif
13489 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13490 (xmlGenericError == NULL))
13491 initGenericErrorDefaultFunc(NULL);
13492 xmlInitGlobals();
13493 xmlInitThreads();
13494 xmlInitMemory();
13495 xmlInitCharEncodingHandlers();
13496 xmlDefaultSAXHandlerInit();
13497 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013498#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013499 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013500#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013501#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013502 htmlInitAutoClose();
13503 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013504#endif
13505#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013506 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013507#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013508 xmlParserInitialized = 1;
13509#ifdef LIBXML_THREAD_ENABLED
13510 }
13511 __xmlGlobalInitMutexUnlock();
13512#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013513}
13514
13515/**
13516 * xmlCleanupParser:
13517 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013518 * This function name is somewhat misleading. It does not clean up
13519 * parser state, it cleans up memory allocated by the library itself.
13520 * It is a cleanup function for the XML library. It tries to reclaim all
13521 * related global memory allocated for the library processing.
13522 * It doesn't deallocate any document related memory. One should
13523 * call xmlCleanupParser() only when the process has finished using
13524 * the library and all XML/HTML documents built with it.
13525 * See also xmlInitParser() which has the opposite function of preparing
13526 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013527 */
13528
13529void
13530xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013531 if (!xmlParserInitialized)
13532 return;
13533
Owen Taylor3473f882001-02-23 17:55:21 +000013534 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013535#ifdef LIBXML_CATALOG_ENABLED
13536 xmlCatalogCleanup();
13537#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013538 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013539 xmlCleanupInputCallbacks();
13540#ifdef LIBXML_OUTPUT_ENABLED
13541 xmlCleanupOutputCallbacks();
13542#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013543#ifdef LIBXML_SCHEMAS_ENABLED
13544 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013545 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013546#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013547 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013548 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013549 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013550 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013551 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013552}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013553
13554/************************************************************************
13555 * *
13556 * New set (2.6.0) of simpler and more flexible APIs *
13557 * *
13558 ************************************************************************/
13559
13560/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013561 * DICT_FREE:
13562 * @str: a string
13563 *
13564 * Free a string if it is not owned by the "dict" dictionnary in the
13565 * current scope
13566 */
13567#define DICT_FREE(str) \
13568 if ((str) && ((!dict) || \
13569 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13570 xmlFree((char *)(str));
13571
13572/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013573 * xmlCtxtReset:
13574 * @ctxt: an XML parser context
13575 *
13576 * Reset a parser context
13577 */
13578void
13579xmlCtxtReset(xmlParserCtxtPtr ctxt)
13580{
13581 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013582 xmlDictPtr dict;
13583
13584 if (ctxt == NULL)
13585 return;
13586
13587 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013588
13589 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13590 xmlFreeInputStream(input);
13591 }
13592 ctxt->inputNr = 0;
13593 ctxt->input = NULL;
13594
13595 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013596 if (ctxt->spaceTab != NULL) {
13597 ctxt->spaceTab[0] = -1;
13598 ctxt->space = &ctxt->spaceTab[0];
13599 } else {
13600 ctxt->space = NULL;
13601 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013602
13603
13604 ctxt->nodeNr = 0;
13605 ctxt->node = NULL;
13606
13607 ctxt->nameNr = 0;
13608 ctxt->name = NULL;
13609
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013610 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013611 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013612 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013613 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013614 DICT_FREE(ctxt->directory);
13615 ctxt->directory = NULL;
13616 DICT_FREE(ctxt->extSubURI);
13617 ctxt->extSubURI = NULL;
13618 DICT_FREE(ctxt->extSubSystem);
13619 ctxt->extSubSystem = NULL;
13620 if (ctxt->myDoc != NULL)
13621 xmlFreeDoc(ctxt->myDoc);
13622 ctxt->myDoc = NULL;
13623
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013624 ctxt->standalone = -1;
13625 ctxt->hasExternalSubset = 0;
13626 ctxt->hasPErefs = 0;
13627 ctxt->html = 0;
13628 ctxt->external = 0;
13629 ctxt->instate = XML_PARSER_START;
13630 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013631
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013632 ctxt->wellFormed = 1;
13633 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013634 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013635 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013636#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013637 ctxt->vctxt.userData = ctxt;
13638 ctxt->vctxt.error = xmlParserValidityError;
13639 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013640#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013641 ctxt->record_info = 0;
13642 ctxt->nbChars = 0;
13643 ctxt->checkIndex = 0;
13644 ctxt->inSubset = 0;
13645 ctxt->errNo = XML_ERR_OK;
13646 ctxt->depth = 0;
13647 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13648 ctxt->catalogs = NULL;
13649 xmlInitNodeInfoSeq(&ctxt->node_seq);
13650
13651 if (ctxt->attsDefault != NULL) {
13652 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13653 ctxt->attsDefault = NULL;
13654 }
13655 if (ctxt->attsSpecial != NULL) {
13656 xmlHashFree(ctxt->attsSpecial, NULL);
13657 ctxt->attsSpecial = NULL;
13658 }
13659
Daniel Veillard4432df22003-09-28 18:58:27 +000013660#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013661 if (ctxt->catalogs != NULL)
13662 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013663#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013664 if (ctxt->lastError.code != XML_ERR_OK)
13665 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013666}
13667
13668/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013669 * xmlCtxtResetPush:
13670 * @ctxt: an XML parser context
13671 * @chunk: a pointer to an array of chars
13672 * @size: number of chars in the array
13673 * @filename: an optional file name or URI
13674 * @encoding: the document encoding, or NULL
13675 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013676 * Reset a push parser context
13677 *
13678 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013679 */
13680int
13681xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13682 int size, const char *filename, const char *encoding)
13683{
13684 xmlParserInputPtr inputStream;
13685 xmlParserInputBufferPtr buf;
13686 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13687
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013688 if (ctxt == NULL)
13689 return(1);
13690
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013691 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13692 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13693
13694 buf = xmlAllocParserInputBuffer(enc);
13695 if (buf == NULL)
13696 return(1);
13697
13698 if (ctxt == NULL) {
13699 xmlFreeParserInputBuffer(buf);
13700 return(1);
13701 }
13702
13703 xmlCtxtReset(ctxt);
13704
13705 if (ctxt->pushTab == NULL) {
13706 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13707 sizeof(xmlChar *));
13708 if (ctxt->pushTab == NULL) {
13709 xmlErrMemory(ctxt, NULL);
13710 xmlFreeParserInputBuffer(buf);
13711 return(1);
13712 }
13713 }
13714
13715 if (filename == NULL) {
13716 ctxt->directory = NULL;
13717 } else {
13718 ctxt->directory = xmlParserGetDirectory(filename);
13719 }
13720
13721 inputStream = xmlNewInputStream(ctxt);
13722 if (inputStream == NULL) {
13723 xmlFreeParserInputBuffer(buf);
13724 return(1);
13725 }
13726
13727 if (filename == NULL)
13728 inputStream->filename = NULL;
13729 else
13730 inputStream->filename = (char *)
13731 xmlCanonicPath((const xmlChar *) filename);
13732 inputStream->buf = buf;
13733 inputStream->base = inputStream->buf->buffer->content;
13734 inputStream->cur = inputStream->buf->buffer->content;
13735 inputStream->end =
13736 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13737
13738 inputPush(ctxt, inputStream);
13739
13740 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13741 (ctxt->input->buf != NULL)) {
13742 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13743 int cur = ctxt->input->cur - ctxt->input->base;
13744
13745 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13746
13747 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13748 ctxt->input->cur = ctxt->input->base + cur;
13749 ctxt->input->end =
13750 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13751 use];
13752#ifdef DEBUG_PUSH
13753 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13754#endif
13755 }
13756
13757 if (encoding != NULL) {
13758 xmlCharEncodingHandlerPtr hdlr;
13759
Daniel Veillard37334572008-07-31 08:20:02 +000013760 if (ctxt->encoding != NULL)
13761 xmlFree((xmlChar *) ctxt->encoding);
13762 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13763
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013764 hdlr = xmlFindCharEncodingHandler(encoding);
13765 if (hdlr != NULL) {
13766 xmlSwitchToEncoding(ctxt, hdlr);
13767 } else {
13768 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13769 "Unsupported encoding %s\n", BAD_CAST encoding);
13770 }
13771 } else if (enc != XML_CHAR_ENCODING_NONE) {
13772 xmlSwitchEncoding(ctxt, enc);
13773 }
13774
13775 return(0);
13776}
13777
Daniel Veillard37334572008-07-31 08:20:02 +000013778
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013779/**
Daniel Veillard37334572008-07-31 08:20:02 +000013780 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013781 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013782 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000013783 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013784 *
13785 * Applies the options to the parser context
13786 *
13787 * Returns 0 in case of success, the set of unknown or unimplemented options
13788 * in case of error.
13789 */
Daniel Veillard37334572008-07-31 08:20:02 +000013790static int
13791xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013792{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013793 if (ctxt == NULL)
13794 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000013795 if (encoding != NULL) {
13796 if (ctxt->encoding != NULL)
13797 xmlFree((xmlChar *) ctxt->encoding);
13798 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13799 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013800 if (options & XML_PARSE_RECOVER) {
13801 ctxt->recovery = 1;
13802 options -= XML_PARSE_RECOVER;
13803 } else
13804 ctxt->recovery = 0;
13805 if (options & XML_PARSE_DTDLOAD) {
13806 ctxt->loadsubset = XML_DETECT_IDS;
13807 options -= XML_PARSE_DTDLOAD;
13808 } else
13809 ctxt->loadsubset = 0;
13810 if (options & XML_PARSE_DTDATTR) {
13811 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13812 options -= XML_PARSE_DTDATTR;
13813 }
13814 if (options & XML_PARSE_NOENT) {
13815 ctxt->replaceEntities = 1;
13816 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13817 options -= XML_PARSE_NOENT;
13818 } else
13819 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013820 if (options & XML_PARSE_PEDANTIC) {
13821 ctxt->pedantic = 1;
13822 options -= XML_PARSE_PEDANTIC;
13823 } else
13824 ctxt->pedantic = 0;
13825 if (options & XML_PARSE_NOBLANKS) {
13826 ctxt->keepBlanks = 0;
13827 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13828 options -= XML_PARSE_NOBLANKS;
13829 } else
13830 ctxt->keepBlanks = 1;
13831 if (options & XML_PARSE_DTDVALID) {
13832 ctxt->validate = 1;
13833 if (options & XML_PARSE_NOWARNING)
13834 ctxt->vctxt.warning = NULL;
13835 if (options & XML_PARSE_NOERROR)
13836 ctxt->vctxt.error = NULL;
13837 options -= XML_PARSE_DTDVALID;
13838 } else
13839 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013840 if (options & XML_PARSE_NOWARNING) {
13841 ctxt->sax->warning = NULL;
13842 options -= XML_PARSE_NOWARNING;
13843 }
13844 if (options & XML_PARSE_NOERROR) {
13845 ctxt->sax->error = NULL;
13846 ctxt->sax->fatalError = NULL;
13847 options -= XML_PARSE_NOERROR;
13848 }
Daniel Veillard81273902003-09-30 00:43:48 +000013849#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013850 if (options & XML_PARSE_SAX1) {
13851 ctxt->sax->startElement = xmlSAX2StartElement;
13852 ctxt->sax->endElement = xmlSAX2EndElement;
13853 ctxt->sax->startElementNs = NULL;
13854 ctxt->sax->endElementNs = NULL;
13855 ctxt->sax->initialized = 1;
13856 options -= XML_PARSE_SAX1;
13857 }
Daniel Veillard81273902003-09-30 00:43:48 +000013858#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013859 if (options & XML_PARSE_NODICT) {
13860 ctxt->dictNames = 0;
13861 options -= XML_PARSE_NODICT;
13862 } else {
13863 ctxt->dictNames = 1;
13864 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013865 if (options & XML_PARSE_NOCDATA) {
13866 ctxt->sax->cdataBlock = NULL;
13867 options -= XML_PARSE_NOCDATA;
13868 }
13869 if (options & XML_PARSE_NSCLEAN) {
13870 ctxt->options |= XML_PARSE_NSCLEAN;
13871 options -= XML_PARSE_NSCLEAN;
13872 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013873 if (options & XML_PARSE_NONET) {
13874 ctxt->options |= XML_PARSE_NONET;
13875 options -= XML_PARSE_NONET;
13876 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013877 if (options & XML_PARSE_COMPACT) {
13878 ctxt->options |= XML_PARSE_COMPACT;
13879 options -= XML_PARSE_COMPACT;
13880 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000013881 if (options & XML_PARSE_OLD10) {
13882 ctxt->options |= XML_PARSE_OLD10;
13883 options -= XML_PARSE_OLD10;
13884 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013885 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013886 return (options);
13887}
13888
13889/**
Daniel Veillard37334572008-07-31 08:20:02 +000013890 * xmlCtxtUseOptions:
13891 * @ctxt: an XML parser context
13892 * @options: a combination of xmlParserOption
13893 *
13894 * Applies the options to the parser context
13895 *
13896 * Returns 0 in case of success, the set of unknown or unimplemented options
13897 * in case of error.
13898 */
13899int
13900xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13901{
13902 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
13903}
13904
13905/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013906 * xmlDoRead:
13907 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013908 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013909 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013910 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013911 * @reuse: keep the context for reuse
13912 *
13913 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000013914 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013915 * Returns the resulting document tree or NULL
13916 */
13917static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013918xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13919 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013920{
13921 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000013922
13923 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013924 if (encoding != NULL) {
13925 xmlCharEncodingHandlerPtr hdlr;
13926
13927 hdlr = xmlFindCharEncodingHandler(encoding);
13928 if (hdlr != NULL)
13929 xmlSwitchToEncoding(ctxt, hdlr);
13930 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013931 if ((URL != NULL) && (ctxt->input != NULL) &&
13932 (ctxt->input->filename == NULL))
13933 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013934 xmlParseDocument(ctxt);
13935 if ((ctxt->wellFormed) || ctxt->recovery)
13936 ret = ctxt->myDoc;
13937 else {
13938 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013939 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013940 xmlFreeDoc(ctxt->myDoc);
13941 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013942 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013943 ctxt->myDoc = NULL;
13944 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013945 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013946 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013947
13948 return (ret);
13949}
13950
13951/**
13952 * xmlReadDoc:
13953 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013954 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013955 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013956 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013957 *
13958 * parse an XML in-memory document and build a tree.
13959 *
13960 * Returns the resulting document tree
13961 */
13962xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013963xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013964{
13965 xmlParserCtxtPtr ctxt;
13966
13967 if (cur == NULL)
13968 return (NULL);
13969
13970 ctxt = xmlCreateDocParserCtxt(cur);
13971 if (ctxt == NULL)
13972 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013973 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013974}
13975
13976/**
13977 * xmlReadFile:
13978 * @filename: a file or URL
13979 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013980 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013981 *
13982 * parse an XML file from the filesystem or the network.
13983 *
13984 * Returns the resulting document tree
13985 */
13986xmlDocPtr
13987xmlReadFile(const char *filename, const char *encoding, int options)
13988{
13989 xmlParserCtxtPtr ctxt;
13990
Daniel Veillard61b93382003-11-03 14:28:31 +000013991 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013992 if (ctxt == NULL)
13993 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013994 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013995}
13996
13997/**
13998 * xmlReadMemory:
13999 * @buffer: a pointer to a char array
14000 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014001 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014002 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014003 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014004 *
14005 * parse an XML in-memory document and build a tree.
14006 *
14007 * Returns the resulting document tree
14008 */
14009xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014010xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014011{
14012 xmlParserCtxtPtr ctxt;
14013
14014 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14015 if (ctxt == NULL)
14016 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014017 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014018}
14019
14020/**
14021 * xmlReadFd:
14022 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014023 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014024 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014025 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014026 *
14027 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014028 * NOTE that the file descriptor will not be closed when the
14029 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014030 *
14031 * Returns the resulting document tree
14032 */
14033xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014034xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014035{
14036 xmlParserCtxtPtr ctxt;
14037 xmlParserInputBufferPtr input;
14038 xmlParserInputPtr stream;
14039
14040 if (fd < 0)
14041 return (NULL);
14042
14043 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14044 if (input == NULL)
14045 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014046 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014047 ctxt = xmlNewParserCtxt();
14048 if (ctxt == NULL) {
14049 xmlFreeParserInputBuffer(input);
14050 return (NULL);
14051 }
14052 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14053 if (stream == NULL) {
14054 xmlFreeParserInputBuffer(input);
14055 xmlFreeParserCtxt(ctxt);
14056 return (NULL);
14057 }
14058 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014059 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014060}
14061
14062/**
14063 * xmlReadIO:
14064 * @ioread: an I/O read function
14065 * @ioclose: an I/O close function
14066 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014067 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014068 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014069 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014070 *
14071 * parse an XML document from I/O functions and source and build a tree.
14072 *
14073 * Returns the resulting document tree
14074 */
14075xmlDocPtr
14076xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014077 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014078{
14079 xmlParserCtxtPtr ctxt;
14080 xmlParserInputBufferPtr input;
14081 xmlParserInputPtr stream;
14082
14083 if (ioread == NULL)
14084 return (NULL);
14085
14086 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14087 XML_CHAR_ENCODING_NONE);
14088 if (input == NULL)
14089 return (NULL);
14090 ctxt = xmlNewParserCtxt();
14091 if (ctxt == NULL) {
14092 xmlFreeParserInputBuffer(input);
14093 return (NULL);
14094 }
14095 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14096 if (stream == NULL) {
14097 xmlFreeParserInputBuffer(input);
14098 xmlFreeParserCtxt(ctxt);
14099 return (NULL);
14100 }
14101 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014102 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014103}
14104
14105/**
14106 * xmlCtxtReadDoc:
14107 * @ctxt: an XML parser context
14108 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014109 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014110 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014111 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014112 *
14113 * parse an XML in-memory document and build a tree.
14114 * This reuses the existing @ctxt parser context
14115 *
14116 * Returns the resulting document tree
14117 */
14118xmlDocPtr
14119xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014120 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014121{
14122 xmlParserInputPtr stream;
14123
14124 if (cur == NULL)
14125 return (NULL);
14126 if (ctxt == NULL)
14127 return (NULL);
14128
14129 xmlCtxtReset(ctxt);
14130
14131 stream = xmlNewStringInputStream(ctxt, cur);
14132 if (stream == NULL) {
14133 return (NULL);
14134 }
14135 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014136 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137}
14138
14139/**
14140 * xmlCtxtReadFile:
14141 * @ctxt: an XML parser context
14142 * @filename: a file or URL
14143 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014144 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014145 *
14146 * parse an XML file from the filesystem or the network.
14147 * This reuses the existing @ctxt parser context
14148 *
14149 * Returns the resulting document tree
14150 */
14151xmlDocPtr
14152xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14153 const char *encoding, int options)
14154{
14155 xmlParserInputPtr stream;
14156
14157 if (filename == NULL)
14158 return (NULL);
14159 if (ctxt == NULL)
14160 return (NULL);
14161
14162 xmlCtxtReset(ctxt);
14163
Daniel Veillard29614c72004-11-26 10:47:26 +000014164 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014165 if (stream == NULL) {
14166 return (NULL);
14167 }
14168 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014169 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014170}
14171
14172/**
14173 * xmlCtxtReadMemory:
14174 * @ctxt: an XML parser context
14175 * @buffer: a pointer to a char array
14176 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014177 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014178 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014179 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014180 *
14181 * parse an XML in-memory document and build a tree.
14182 * This reuses the existing @ctxt parser context
14183 *
14184 * Returns the resulting document tree
14185 */
14186xmlDocPtr
14187xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014188 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014189{
14190 xmlParserInputBufferPtr input;
14191 xmlParserInputPtr stream;
14192
14193 if (ctxt == NULL)
14194 return (NULL);
14195 if (buffer == NULL)
14196 return (NULL);
14197
14198 xmlCtxtReset(ctxt);
14199
14200 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14201 if (input == NULL) {
14202 return(NULL);
14203 }
14204
14205 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14206 if (stream == NULL) {
14207 xmlFreeParserInputBuffer(input);
14208 return(NULL);
14209 }
14210
14211 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014212 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014213}
14214
14215/**
14216 * xmlCtxtReadFd:
14217 * @ctxt: an XML parser context
14218 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014219 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014220 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014221 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014222 *
14223 * parse an XML from a file descriptor and build a tree.
14224 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014225 * NOTE that the file descriptor will not be closed when the
14226 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014227 *
14228 * Returns the resulting document tree
14229 */
14230xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014231xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14232 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014233{
14234 xmlParserInputBufferPtr input;
14235 xmlParserInputPtr stream;
14236
14237 if (fd < 0)
14238 return (NULL);
14239 if (ctxt == NULL)
14240 return (NULL);
14241
14242 xmlCtxtReset(ctxt);
14243
14244
14245 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14246 if (input == NULL)
14247 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014248 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014249 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14250 if (stream == NULL) {
14251 xmlFreeParserInputBuffer(input);
14252 return (NULL);
14253 }
14254 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014255 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014256}
14257
14258/**
14259 * xmlCtxtReadIO:
14260 * @ctxt: an XML parser context
14261 * @ioread: an I/O read function
14262 * @ioclose: an I/O close function
14263 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014264 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014265 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014266 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267 *
14268 * parse an XML document from I/O functions and source and build a tree.
14269 * This reuses the existing @ctxt parser context
14270 *
14271 * Returns the resulting document tree
14272 */
14273xmlDocPtr
14274xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14275 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014276 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014277 const char *encoding, int options)
14278{
14279 xmlParserInputBufferPtr input;
14280 xmlParserInputPtr stream;
14281
14282 if (ioread == NULL)
14283 return (NULL);
14284 if (ctxt == NULL)
14285 return (NULL);
14286
14287 xmlCtxtReset(ctxt);
14288
14289 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14290 XML_CHAR_ENCODING_NONE);
14291 if (input == NULL)
14292 return (NULL);
14293 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14294 if (stream == NULL) {
14295 xmlFreeParserInputBuffer(input);
14296 return (NULL);
14297 }
14298 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014299 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014300}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014301
14302#define bottom_parser
14303#include "elfgcchack.h"