blob: aeb8ab3b4e09e964cf20760b95e5647087d4065d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003458 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003459 in++;
3460 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003461 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 in++;
3463 }
3464 goto get_more_space;
3465 }
3466 if (*in == '<') {
3467 nbchar = in - ctxt->input->cur;
3468 if (nbchar > 0) {
3469 const xmlChar *tmp = ctxt->input->cur;
3470 ctxt->input->cur = in;
3471
Daniel Veillard34099b42004-11-04 17:34:35 +00003472 if ((ctxt->sax != NULL) &&
3473 (ctxt->sax->ignorableWhitespace !=
3474 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003475 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003476 if (ctxt->sax->ignorableWhitespace != NULL)
3477 ctxt->sax->ignorableWhitespace(ctxt->userData,
3478 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003479 } else if (ctxt->sax->characters != NULL)
3480 ctxt->sax->characters(ctxt->userData,
3481 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003482 } else if ((ctxt->sax != NULL) &&
3483 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003484 ctxt->sax->characters(ctxt->userData,
3485 tmp, nbchar);
3486 }
3487 }
3488 return;
3489 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003491get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003492 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003493 while (test_char_data[*in]) {
3494 in++;
3495 ccol++;
3496 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003497 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003498 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003499 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003500 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003501 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003502 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003503 in++;
3504 }
3505 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003506 }
3507 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003508 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003509 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003510 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 return;
3512 }
3513 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003514 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003515 goto get_more;
3516 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003517 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003518 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003522 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003523 const xmlChar *tmp = ctxt->input->cur;
3524 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003525
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003526 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003527 if (ctxt->sax->ignorableWhitespace != NULL)
3528 ctxt->sax->ignorableWhitespace(ctxt->userData,
3529 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003530 } else if (ctxt->sax->characters != NULL)
3531 ctxt->sax->characters(ctxt->userData,
3532 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003533 line = ctxt->input->line;
3534 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003535 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003536 if (ctxt->sax->characters != NULL)
3537 ctxt->sax->characters(ctxt->userData,
3538 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003539 line = ctxt->input->line;
3540 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003541 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 }
3543 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003544 if (*in == 0xD) {
3545 in++;
3546 if (*in == 0xA) {
3547 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003550 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003551 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003552 in--;
3553 }
3554 if (*in == '<') {
3555 return;
3556 }
3557 if (*in == '&') {
3558 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003559 }
3560 SHRINK;
3561 GROW;
3562 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003563 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003564 nbchar = 0;
3565 }
Daniel Veillard50582112001-03-26 22:52:16 +00003566 ctxt->input->line = line;
3567 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003568 xmlParseCharDataComplex(ctxt, cdata);
3569}
3570
Daniel Veillard01c13b52002-12-10 15:19:08 +00003571/**
3572 * xmlParseCharDataComplex:
3573 * @ctxt: an XML parser context
3574 * @cdata: int indicating whether we are within a CDATA section
3575 *
3576 * parse a CharData section.this is the fallback function
3577 * of xmlParseCharData() when the parsing requires handling
3578 * of non-ASCII characters.
3579 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003580void
3581xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003582 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3583 int nbchar = 0;
3584 int cur, l;
3585 int count = 0;
3586
3587 SHRINK;
3588 GROW;
3589 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003590 while ((cur != '<') && /* checked */
3591 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003592 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003593 if ((cur == ']') && (NXT(1) == ']') &&
3594 (NXT(2) == '>')) {
3595 if (cdata) break;
3596 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003597 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003598 }
3599 }
3600 COPY_BUF(l,buf,nbchar,cur);
3601 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003602 buf[nbchar] = 0;
3603
Owen Taylor3473f882001-02-23 17:55:21 +00003604 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003605 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003606 */
3607 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003608 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003609 if (ctxt->sax->ignorableWhitespace != NULL)
3610 ctxt->sax->ignorableWhitespace(ctxt->userData,
3611 buf, nbchar);
3612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3615 }
3616 }
3617 nbchar = 0;
3618 }
3619 count++;
3620 if (count > 50) {
3621 GROW;
3622 count = 0;
3623 }
3624 NEXTL(l);
3625 cur = CUR_CHAR(l);
3626 }
3627 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003628 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003629 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003630 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003631 */
3632 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003633 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003634 if (ctxt->sax->ignorableWhitespace != NULL)
3635 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3636 } else {
3637 if (ctxt->sax->characters != NULL)
3638 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3639 }
3640 }
3641 }
3642}
3643
3644/**
3645 * xmlParseExternalID:
3646 * @ctxt: an XML parser context
3647 * @publicID: a xmlChar** receiving PubidLiteral
3648 * @strict: indicate whether we should restrict parsing to only
3649 * production [75], see NOTE below
3650 *
3651 * Parse an External ID or a Public ID
3652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003653 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003654 * 'PUBLIC' S PubidLiteral S SystemLiteral
3655 *
3656 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3657 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3658 *
3659 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3660 *
3661 * Returns the function returns SystemLiteral and in the second
3662 * case publicID receives PubidLiteral, is strict is off
3663 * it is possible to return NULL and have publicID set.
3664 */
3665
3666xmlChar *
3667xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3668 xmlChar *URI = NULL;
3669
3670 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003671
3672 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003673 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003674 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003675 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003676 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3677 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003678 }
3679 SKIP_BLANKS;
3680 URI = xmlParseSystemLiteral(ctxt);
3681 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003682 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003684 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003685 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003686 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003688 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003689 }
3690 SKIP_BLANKS;
3691 *publicID = xmlParsePubidLiteral(ctxt);
3692 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003693 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 if (strict) {
3696 /*
3697 * We don't handle [83] so "S SystemLiteral" is required.
3698 */
William M. Brack76e95df2003-10-18 16:20:14 +00003699 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003701 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003702 }
3703 } else {
3704 /*
3705 * We handle [83] so we return immediately, if
3706 * "S SystemLiteral" is not detected. From a purely parsing
3707 * point of view that's a nice mess.
3708 */
3709 const xmlChar *ptr;
3710 GROW;
3711
3712 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003713 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003714
William M. Brack76e95df2003-10-18 16:20:14 +00003715 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003716 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3717 }
3718 SKIP_BLANKS;
3719 URI = xmlParseSystemLiteral(ctxt);
3720 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003721 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723 }
3724 return(URI);
3725}
3726
3727/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003728 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003729 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003730 * @buf: the already parsed part of the buffer
3731 * @len: number of bytes filles in the buffer
3732 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003733 *
3734 * Skip an XML (SGML) comment <!-- .... -->
3735 * The spec says that "For compatibility, the string "--" (double-hyphen)
3736 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003737 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003738 *
3739 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3740 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003741static void
3742xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003743 int q, ql;
3744 int r, rl;
3745 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 xmlParserInputPtr input = ctxt->input;
3747 int count = 0;
3748
Owen Taylor3473f882001-02-23 17:55:21 +00003749 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003750 len = 0;
3751 size = XML_PARSER_BUFFER_SIZE;
3752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3753 if (buf == NULL) {
3754 xmlErrMemory(ctxt, NULL);
3755 return;
3756 }
Owen Taylor3473f882001-02-23 17:55:21 +00003757 }
3758 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003759 if (q == 0)
3760 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 NEXTL(ql);
3762 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003763 if (r == 0)
3764 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003765 NEXTL(rl);
3766 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003767 if (cur == 0)
3768 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003769 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003770 ((cur != '>') ||
3771 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003772 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003773 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003774 }
3775 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003776 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003777 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003778 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3779 if (new_buf == NULL) {
3780 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 return;
3783 }
William M. Bracka3215c72004-07-31 16:24:01 +00003784 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 }
3786 COPY_BUF(ql,buf,len,q);
3787 q = r;
3788 ql = rl;
3789 r = cur;
3790 rl = l;
3791
3792 count++;
3793 if (count > 50) {
3794 GROW;
3795 count = 0;
3796 }
3797 NEXTL(l);
3798 cur = CUR_CHAR(l);
3799 if (cur == 0) {
3800 SHRINK;
3801 GROW;
3802 cur = CUR_CHAR(l);
3803 }
3804 }
3805 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003806 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003807 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003808 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 xmlFree(buf);
3810 } else {
3811 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3813 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 NEXT;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3817 (!ctxt->disableSAX))
3818 ctxt->sax->comment(ctxt->userData, buf);
3819 xmlFree(buf);
3820 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003821 return;
3822not_terminated:
3823 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3824 "Comment not terminated\n", NULL);
3825 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003826}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003827/**
3828 * xmlParseComment:
3829 * @ctxt: an XML parser context
3830 *
3831 * Skip an XML (SGML) comment <!-- .... -->
3832 * The spec says that "For compatibility, the string "--" (double-hyphen)
3833 * must not occur within comments. "
3834 *
3835 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3836 */
3837void
3838xmlParseComment(xmlParserCtxtPtr ctxt) {
3839 xmlChar *buf = NULL;
3840 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003841 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003842 xmlParserInputState state;
3843 const xmlChar *in;
3844 int nbchar = 0, ccol;
3845
3846 /*
3847 * Check that there is a comment right here.
3848 */
3849 if ((RAW != '<') || (NXT(1) != '!') ||
3850 (NXT(2) != '-') || (NXT(3) != '-')) return;
3851
3852 state = ctxt->instate;
3853 ctxt->instate = XML_PARSER_COMMENT;
3854 SKIP(4);
3855 SHRINK;
3856 GROW;
3857
3858 /*
3859 * Accelerated common case where input don't need to be
3860 * modified before passing it to the handler.
3861 */
3862 in = ctxt->input->cur;
3863 do {
3864 if (*in == 0xA) {
3865 ctxt->input->line++; ctxt->input->col = 1;
3866 in++;
3867 while (*in == 0xA) {
3868 ctxt->input->line++; ctxt->input->col = 1;
3869 in++;
3870 }
3871 }
3872get_more:
3873 ccol = ctxt->input->col;
3874 while (((*in > '-') && (*in <= 0x7F)) ||
3875 ((*in >= 0x20) && (*in < '-')) ||
3876 (*in == 0x09)) {
3877 in++;
3878 ccol++;
3879 }
3880 ctxt->input->col = ccol;
3881 if (*in == 0xA) {
3882 ctxt->input->line++; ctxt->input->col = 1;
3883 in++;
3884 while (*in == 0xA) {
3885 ctxt->input->line++; ctxt->input->col = 1;
3886 in++;
3887 }
3888 goto get_more;
3889 }
3890 nbchar = in - ctxt->input->cur;
3891 /*
3892 * save current set of data
3893 */
3894 if (nbchar > 0) {
3895 if ((ctxt->sax != NULL) &&
3896 (ctxt->sax->comment != NULL)) {
3897 if (buf == NULL) {
3898 if ((*in == '-') && (in[1] == '-'))
3899 size = nbchar + 1;
3900 else
3901 size = XML_PARSER_BUFFER_SIZE + nbchar;
3902 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3903 if (buf == NULL) {
3904 xmlErrMemory(ctxt, NULL);
3905 ctxt->instate = state;
3906 return;
3907 }
3908 len = 0;
3909 } else if (len + nbchar + 1 >= size) {
3910 xmlChar *new_buf;
3911 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3912 new_buf = (xmlChar *) xmlRealloc(buf,
3913 size * sizeof(xmlChar));
3914 if (new_buf == NULL) {
3915 xmlFree (buf);
3916 xmlErrMemory(ctxt, NULL);
3917 ctxt->instate = state;
3918 return;
3919 }
3920 buf = new_buf;
3921 }
3922 memcpy(&buf[len], ctxt->input->cur, nbchar);
3923 len += nbchar;
3924 buf[len] = 0;
3925 }
3926 }
3927 ctxt->input->cur = in;
3928 if (*in == 0xA)
3929
3930 if (*in == 0xD) {
3931 in++;
3932 if (*in == 0xA) {
3933 ctxt->input->cur = in;
3934 in++;
3935 ctxt->input->line++; ctxt->input->col = 1;
3936 continue; /* while */
3937 }
3938 in--;
3939 }
3940 SHRINK;
3941 GROW;
3942 in = ctxt->input->cur;
3943 if (*in == '-') {
3944 if (in[1] == '-') {
3945 if (in[2] == '>') {
3946 SKIP(3);
3947 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3948 (!ctxt->disableSAX)) {
3949 if (buf != NULL)
3950 ctxt->sax->comment(ctxt->userData, buf);
3951 else
3952 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3953 }
3954 if (buf != NULL)
3955 xmlFree(buf);
3956 ctxt->instate = state;
3957 return;
3958 }
3959 if (buf != NULL)
3960 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3961 "Comment not terminated \n<!--%.50s\n",
3962 buf);
3963 else
3964 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3965 "Comment not terminated \n", NULL);
3966 in++;
3967 ctxt->input->col++;
3968 }
3969 in++;
3970 ctxt->input->col++;
3971 goto get_more;
3972 }
3973 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3974 xmlParseCommentComplex(ctxt, buf, len, size);
3975 ctxt->instate = state;
3976 return;
3977}
3978
Owen Taylor3473f882001-02-23 17:55:21 +00003979
3980/**
3981 * xmlParsePITarget:
3982 * @ctxt: an XML parser context
3983 *
3984 * parse the name of a PI
3985 *
3986 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3987 *
3988 * Returns the PITarget name or NULL
3989 */
3990
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003991const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003992xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003993 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003994
3995 name = xmlParseName(ctxt);
3996 if ((name != NULL) &&
3997 ((name[0] == 'x') || (name[0] == 'X')) &&
3998 ((name[1] == 'm') || (name[1] == 'M')) &&
3999 ((name[2] == 'l') || (name[2] == 'L'))) {
4000 int i;
4001 if ((name[0] == 'x') && (name[1] == 'm') &&
4002 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004003 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004004 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004005 return(name);
4006 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004007 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004008 return(name);
4009 }
4010 for (i = 0;;i++) {
4011 if (xmlW3CPIs[i] == NULL) break;
4012 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4013 return(name);
4014 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004015 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4016 "xmlParsePITarget: invalid name prefix 'xml'\n",
4017 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004018 }
4019 return(name);
4020}
4021
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004022#ifdef LIBXML_CATALOG_ENABLED
4023/**
4024 * xmlParseCatalogPI:
4025 * @ctxt: an XML parser context
4026 * @catalog: the PI value string
4027 *
4028 * parse an XML Catalog Processing Instruction.
4029 *
4030 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4031 *
4032 * Occurs only if allowed by the user and if happening in the Misc
4033 * part of the document before any doctype informations
4034 * This will add the given catalog to the parsing context in order
4035 * to be used if there is a resolution need further down in the document
4036 */
4037
4038static void
4039xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4040 xmlChar *URL = NULL;
4041 const xmlChar *tmp, *base;
4042 xmlChar marker;
4043
4044 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004045 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004046 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4047 goto error;
4048 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004049 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004050 if (*tmp != '=') {
4051 return;
4052 }
4053 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004054 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004055 marker = *tmp;
4056 if ((marker != '\'') && (marker != '"'))
4057 goto error;
4058 tmp++;
4059 base = tmp;
4060 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4061 if (*tmp == 0)
4062 goto error;
4063 URL = xmlStrndup(base, tmp - base);
4064 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004065 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004066 if (*tmp != 0)
4067 goto error;
4068
4069 if (URL != NULL) {
4070 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4071 xmlFree(URL);
4072 }
4073 return;
4074
4075error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004076 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4077 "Catalog PI syntax error: %s\n",
4078 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004079 if (URL != NULL)
4080 xmlFree(URL);
4081}
4082#endif
4083
Owen Taylor3473f882001-02-23 17:55:21 +00004084/**
4085 * xmlParsePI:
4086 * @ctxt: an XML parser context
4087 *
4088 * parse an XML Processing Instruction.
4089 *
4090 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4091 *
4092 * The processing is transfered to SAX once parsed.
4093 */
4094
4095void
4096xmlParsePI(xmlParserCtxtPtr ctxt) {
4097 xmlChar *buf = NULL;
4098 int len = 0;
4099 int size = XML_PARSER_BUFFER_SIZE;
4100 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004101 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004102 xmlParserInputState state;
4103 int count = 0;
4104
4105 if ((RAW == '<') && (NXT(1) == '?')) {
4106 xmlParserInputPtr input = ctxt->input;
4107 state = ctxt->instate;
4108 ctxt->instate = XML_PARSER_PI;
4109 /*
4110 * this is a Processing Instruction.
4111 */
4112 SKIP(2);
4113 SHRINK;
4114
4115 /*
4116 * Parse the target name and check for special support like
4117 * namespace.
4118 */
4119 target = xmlParsePITarget(ctxt);
4120 if (target != NULL) {
4121 if ((RAW == '?') && (NXT(1) == '>')) {
4122 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004123 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4124 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004125 }
4126 SKIP(2);
4127
4128 /*
4129 * SAX: PI detected.
4130 */
4131 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4132 (ctxt->sax->processingInstruction != NULL))
4133 ctxt->sax->processingInstruction(ctxt->userData,
4134 target, NULL);
4135 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004136 return;
4137 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004138 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004139 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004140 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004141 ctxt->instate = state;
4142 return;
4143 }
4144 cur = CUR;
4145 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004146 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4147 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004148 }
4149 SKIP_BLANKS;
4150 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004151 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004152 ((cur != '?') || (NXT(1) != '>'))) {
4153 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004154 xmlChar *tmp;
4155
Owen Taylor3473f882001-02-23 17:55:21 +00004156 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004157 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4158 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004159 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004160 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004161 ctxt->instate = state;
4162 return;
4163 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004164 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004165 }
4166 count++;
4167 if (count > 50) {
4168 GROW;
4169 count = 0;
4170 }
4171 COPY_BUF(l,buf,len,cur);
4172 NEXTL(l);
4173 cur = CUR_CHAR(l);
4174 if (cur == 0) {
4175 SHRINK;
4176 GROW;
4177 cur = CUR_CHAR(l);
4178 }
4179 }
4180 buf[len] = 0;
4181 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004182 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4183 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 } else {
4185 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4187 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004188 }
4189 SKIP(2);
4190
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004191#ifdef LIBXML_CATALOG_ENABLED
4192 if (((state == XML_PARSER_MISC) ||
4193 (state == XML_PARSER_START)) &&
4194 (xmlStrEqual(target, XML_CATALOG_PI))) {
4195 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4196 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4197 (allow == XML_CATA_ALLOW_ALL))
4198 xmlParseCatalogPI(ctxt, buf);
4199 }
4200#endif
4201
4202
Owen Taylor3473f882001-02-23 17:55:21 +00004203 /*
4204 * SAX: PI detected.
4205 */
4206 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4207 (ctxt->sax->processingInstruction != NULL))
4208 ctxt->sax->processingInstruction(ctxt->userData,
4209 target, buf);
4210 }
4211 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004213 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004214 }
4215 ctxt->instate = state;
4216 }
4217}
4218
4219/**
4220 * xmlParseNotationDecl:
4221 * @ctxt: an XML parser context
4222 *
4223 * parse a notation declaration
4224 *
4225 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4226 *
4227 * Hence there is actually 3 choices:
4228 * 'PUBLIC' S PubidLiteral
4229 * 'PUBLIC' S PubidLiteral S SystemLiteral
4230 * and 'SYSTEM' S SystemLiteral
4231 *
4232 * See the NOTE on xmlParseExternalID().
4233 */
4234
4235void
4236xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004237 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004238 xmlChar *Pubid;
4239 xmlChar *Systemid;
4240
Daniel Veillarda07050d2003-10-19 14:46:32 +00004241 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004242 xmlParserInputPtr input = ctxt->input;
4243 SHRINK;
4244 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004245 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004246 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4247 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004248 return;
4249 }
4250 SKIP_BLANKS;
4251
Daniel Veillard76d66f42001-05-16 21:05:17 +00004252 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004253 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004254 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004255 return;
4256 }
William M. Brack76e95df2003-10-18 16:20:14 +00004257 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004258 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004259 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004260 return;
4261 }
4262 SKIP_BLANKS;
4263
4264 /*
4265 * Parse the IDs.
4266 */
4267 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4268 SKIP_BLANKS;
4269
4270 if (RAW == '>') {
4271 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004272 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4273 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 NEXT;
4276 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4277 (ctxt->sax->notationDecl != NULL))
4278 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4279 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004280 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282 if (Systemid != NULL) xmlFree(Systemid);
4283 if (Pubid != NULL) xmlFree(Pubid);
4284 }
4285}
4286
4287/**
4288 * xmlParseEntityDecl:
4289 * @ctxt: an XML parser context
4290 *
4291 * parse <!ENTITY declarations
4292 *
4293 * [70] EntityDecl ::= GEDecl | PEDecl
4294 *
4295 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4296 *
4297 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4298 *
4299 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4300 *
4301 * [74] PEDef ::= EntityValue | ExternalID
4302 *
4303 * [76] NDataDecl ::= S 'NDATA' S Name
4304 *
4305 * [ VC: Notation Declared ]
4306 * The Name must match the declared name of a notation.
4307 */
4308
4309void
4310xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004311 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004312 xmlChar *value = NULL;
4313 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004314 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004315 int isParameter = 0;
4316 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004317 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004318
Daniel Veillard4c778d82005-01-23 17:37:44 +00004319 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004320 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004321 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004322 SHRINK;
4323 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004324 skipped = SKIP_BLANKS;
4325 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004326 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4327 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004328 }
Owen Taylor3473f882001-02-23 17:55:21 +00004329
4330 if (RAW == '%') {
4331 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004332 skipped = SKIP_BLANKS;
4333 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4335 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004336 }
Owen Taylor3473f882001-02-23 17:55:21 +00004337 isParameter = 1;
4338 }
4339
Daniel Veillard76d66f42001-05-16 21:05:17 +00004340 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004341 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004342 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4343 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004344 return;
4345 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004346 skipped = SKIP_BLANKS;
4347 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4349 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004350 }
Owen Taylor3473f882001-02-23 17:55:21 +00004351
Daniel Veillardf5582f12002-06-11 10:08:16 +00004352 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004353 /*
4354 * handle the various case of definitions...
4355 */
4356 if (isParameter) {
4357 if ((RAW == '"') || (RAW == '\'')) {
4358 value = xmlParseEntityValue(ctxt, &orig);
4359 if (value) {
4360 if ((ctxt->sax != NULL) &&
4361 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4362 ctxt->sax->entityDecl(ctxt->userData, name,
4363 XML_INTERNAL_PARAMETER_ENTITY,
4364 NULL, NULL, value);
4365 }
4366 } else {
4367 URI = xmlParseExternalID(ctxt, &literal, 1);
4368 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004369 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004370 }
4371 if (URI) {
4372 xmlURIPtr uri;
4373
4374 uri = xmlParseURI((const char *) URI);
4375 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004376 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4377 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004378 /*
4379 * This really ought to be a well formedness error
4380 * but the XML Core WG decided otherwise c.f. issue
4381 * E26 of the XML erratas.
4382 */
Owen Taylor3473f882001-02-23 17:55:21 +00004383 } else {
4384 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004385 /*
4386 * Okay this is foolish to block those but not
4387 * invalid URIs.
4388 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004389 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 } else {
4391 if ((ctxt->sax != NULL) &&
4392 (!ctxt->disableSAX) &&
4393 (ctxt->sax->entityDecl != NULL))
4394 ctxt->sax->entityDecl(ctxt->userData, name,
4395 XML_EXTERNAL_PARAMETER_ENTITY,
4396 literal, URI, NULL);
4397 }
4398 xmlFreeURI(uri);
4399 }
4400 }
4401 }
4402 } else {
4403 if ((RAW == '"') || (RAW == '\'')) {
4404 value = xmlParseEntityValue(ctxt, &orig);
4405 if ((ctxt->sax != NULL) &&
4406 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4407 ctxt->sax->entityDecl(ctxt->userData, name,
4408 XML_INTERNAL_GENERAL_ENTITY,
4409 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004410 /*
4411 * For expat compatibility in SAX mode.
4412 */
4413 if ((ctxt->myDoc == NULL) ||
4414 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4415 if (ctxt->myDoc == NULL) {
4416 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4417 }
4418 if (ctxt->myDoc->intSubset == NULL)
4419 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4420 BAD_CAST "fake", NULL, NULL);
4421
Daniel Veillard1af9a412003-08-20 22:54:39 +00004422 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4423 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004424 }
Owen Taylor3473f882001-02-23 17:55:21 +00004425 } else {
4426 URI = xmlParseExternalID(ctxt, &literal, 1);
4427 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004428 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004429 }
4430 if (URI) {
4431 xmlURIPtr uri;
4432
4433 uri = xmlParseURI((const char *)URI);
4434 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004435 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4436 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004437 /*
4438 * This really ought to be a well formedness error
4439 * but the XML Core WG decided otherwise c.f. issue
4440 * E26 of the XML erratas.
4441 */
Owen Taylor3473f882001-02-23 17:55:21 +00004442 } else {
4443 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004444 /*
4445 * Okay this is foolish to block those but not
4446 * invalid URIs.
4447 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004448 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004449 }
4450 xmlFreeURI(uri);
4451 }
4452 }
William M. Brack76e95df2003-10-18 16:20:14 +00004453 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4455 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004456 }
4457 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004458 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004459 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004460 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004461 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4462 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004463 }
4464 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004465 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004466 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4467 (ctxt->sax->unparsedEntityDecl != NULL))
4468 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4469 literal, URI, ndata);
4470 } else {
4471 if ((ctxt->sax != NULL) &&
4472 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4473 ctxt->sax->entityDecl(ctxt->userData, name,
4474 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4475 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004476 /*
4477 * For expat compatibility in SAX mode.
4478 * assuming the entity repalcement was asked for
4479 */
4480 if ((ctxt->replaceEntities != 0) &&
4481 ((ctxt->myDoc == NULL) ||
4482 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4483 if (ctxt->myDoc == NULL) {
4484 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4485 }
4486
4487 if (ctxt->myDoc->intSubset == NULL)
4488 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4489 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004490 xmlSAX2EntityDecl(ctxt, name,
4491 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4492 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004493 }
Owen Taylor3473f882001-02-23 17:55:21 +00004494 }
4495 }
4496 }
4497 SKIP_BLANKS;
4498 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004499 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004500 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004501 } else {
4502 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004503 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4504 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004505 }
4506 NEXT;
4507 }
4508 if (orig != NULL) {
4509 /*
4510 * Ugly mechanism to save the raw entity value.
4511 */
4512 xmlEntityPtr cur = NULL;
4513
4514 if (isParameter) {
4515 if ((ctxt->sax != NULL) &&
4516 (ctxt->sax->getParameterEntity != NULL))
4517 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4518 } else {
4519 if ((ctxt->sax != NULL) &&
4520 (ctxt->sax->getEntity != NULL))
4521 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004522 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004523 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004524 }
Owen Taylor3473f882001-02-23 17:55:21 +00004525 }
4526 if (cur != NULL) {
4527 if (cur->orig != NULL)
4528 xmlFree(orig);
4529 else
4530 cur->orig = orig;
4531 } else
4532 xmlFree(orig);
4533 }
Owen Taylor3473f882001-02-23 17:55:21 +00004534 if (value != NULL) xmlFree(value);
4535 if (URI != NULL) xmlFree(URI);
4536 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004537 }
4538}
4539
4540/**
4541 * xmlParseDefaultDecl:
4542 * @ctxt: an XML parser context
4543 * @value: Receive a possible fixed default value for the attribute
4544 *
4545 * Parse an attribute default declaration
4546 *
4547 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4548 *
4549 * [ VC: Required Attribute ]
4550 * if the default declaration is the keyword #REQUIRED, then the
4551 * attribute must be specified for all elements of the type in the
4552 * attribute-list declaration.
4553 *
4554 * [ VC: Attribute Default Legal ]
4555 * The declared default value must meet the lexical constraints of
4556 * the declared attribute type c.f. xmlValidateAttributeDecl()
4557 *
4558 * [ VC: Fixed Attribute Default ]
4559 * if an attribute has a default value declared with the #FIXED
4560 * keyword, instances of that attribute must match the default value.
4561 *
4562 * [ WFC: No < in Attribute Values ]
4563 * handled in xmlParseAttValue()
4564 *
4565 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4566 * or XML_ATTRIBUTE_FIXED.
4567 */
4568
4569int
4570xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4571 int val;
4572 xmlChar *ret;
4573
4574 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004575 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004576 SKIP(9);
4577 return(XML_ATTRIBUTE_REQUIRED);
4578 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004579 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004580 SKIP(8);
4581 return(XML_ATTRIBUTE_IMPLIED);
4582 }
4583 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004584 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004585 SKIP(6);
4586 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004587 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004588 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4589 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004590 }
4591 SKIP_BLANKS;
4592 }
4593 ret = xmlParseAttValue(ctxt);
4594 ctxt->instate = XML_PARSER_DTD;
4595 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004596 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004597 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004598 } else
4599 *value = ret;
4600 return(val);
4601}
4602
4603/**
4604 * xmlParseNotationType:
4605 * @ctxt: an XML parser context
4606 *
4607 * parse an Notation attribute type.
4608 *
4609 * Note: the leading 'NOTATION' S part has already being parsed...
4610 *
4611 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4612 *
4613 * [ VC: Notation Attributes ]
4614 * Values of this type must match one of the notation names included
4615 * in the declaration; all notation names in the declaration must be declared.
4616 *
4617 * Returns: the notation attribute tree built while parsing
4618 */
4619
4620xmlEnumerationPtr
4621xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004622 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004623 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4624
4625 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004626 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004627 return(NULL);
4628 }
4629 SHRINK;
4630 do {
4631 NEXT;
4632 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004633 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4636 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 return(ret);
4638 }
4639 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004640 if (cur == NULL) return(ret);
4641 if (last == NULL) ret = last = cur;
4642 else {
4643 last->next = cur;
4644 last = cur;
4645 }
4646 SKIP_BLANKS;
4647 } while (RAW == '|');
4648 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004649 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004650 if ((last != NULL) && (last != ret))
4651 xmlFreeEnumeration(last);
4652 return(ret);
4653 }
4654 NEXT;
4655 return(ret);
4656}
4657
4658/**
4659 * xmlParseEnumerationType:
4660 * @ctxt: an XML parser context
4661 *
4662 * parse an Enumeration attribute type.
4663 *
4664 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4665 *
4666 * [ VC: Enumeration ]
4667 * Values of this type must match one of the Nmtoken tokens in
4668 * the declaration
4669 *
4670 * Returns: the enumeration attribute tree built while parsing
4671 */
4672
4673xmlEnumerationPtr
4674xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4675 xmlChar *name;
4676 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4677
4678 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004679 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004680 return(NULL);
4681 }
4682 SHRINK;
4683 do {
4684 NEXT;
4685 SKIP_BLANKS;
4686 name = xmlParseNmtoken(ctxt);
4687 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004688 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004689 return(ret);
4690 }
4691 cur = xmlCreateEnumeration(name);
4692 xmlFree(name);
4693 if (cur == NULL) return(ret);
4694 if (last == NULL) ret = last = cur;
4695 else {
4696 last->next = cur;
4697 last = cur;
4698 }
4699 SKIP_BLANKS;
4700 } while (RAW == '|');
4701 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004702 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004703 return(ret);
4704 }
4705 NEXT;
4706 return(ret);
4707}
4708
4709/**
4710 * xmlParseEnumeratedType:
4711 * @ctxt: an XML parser context
4712 * @tree: the enumeration tree built while parsing
4713 *
4714 * parse an Enumerated attribute type.
4715 *
4716 * [57] EnumeratedType ::= NotationType | Enumeration
4717 *
4718 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4719 *
4720 *
4721 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4722 */
4723
4724int
4725xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004726 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004727 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4730 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004731 return(0);
4732 }
4733 SKIP_BLANKS;
4734 *tree = xmlParseNotationType(ctxt);
4735 if (*tree == NULL) return(0);
4736 return(XML_ATTRIBUTE_NOTATION);
4737 }
4738 *tree = xmlParseEnumerationType(ctxt);
4739 if (*tree == NULL) return(0);
4740 return(XML_ATTRIBUTE_ENUMERATION);
4741}
4742
4743/**
4744 * xmlParseAttributeType:
4745 * @ctxt: an XML parser context
4746 * @tree: the enumeration tree built while parsing
4747 *
4748 * parse the Attribute list def for an element
4749 *
4750 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4751 *
4752 * [55] StringType ::= 'CDATA'
4753 *
4754 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4755 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4756 *
4757 * Validity constraints for attribute values syntax are checked in
4758 * xmlValidateAttributeValue()
4759 *
4760 * [ VC: ID ]
4761 * Values of type ID must match the Name production. A name must not
4762 * appear more than once in an XML document as a value of this type;
4763 * i.e., ID values must uniquely identify the elements which bear them.
4764 *
4765 * [ VC: One ID per Element Type ]
4766 * No element type may have more than one ID attribute specified.
4767 *
4768 * [ VC: ID Attribute Default ]
4769 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4770 *
4771 * [ VC: IDREF ]
4772 * Values of type IDREF must match the Name production, and values
4773 * of type IDREFS must match Names; each IDREF Name must match the value
4774 * of an ID attribute on some element in the XML document; i.e. IDREF
4775 * values must match the value of some ID attribute.
4776 *
4777 * [ VC: Entity Name ]
4778 * Values of type ENTITY must match the Name production, values
4779 * of type ENTITIES must match Names; each Entity Name must match the
4780 * name of an unparsed entity declared in the DTD.
4781 *
4782 * [ VC: Name Token ]
4783 * Values of type NMTOKEN must match the Nmtoken production; values
4784 * of type NMTOKENS must match Nmtokens.
4785 *
4786 * Returns the attribute type
4787 */
4788int
4789xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4790 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004791 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004792 SKIP(5);
4793 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004794 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004795 SKIP(6);
4796 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004797 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(5);
4799 return(XML_ATTRIBUTE_IDREF);
4800 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4801 SKIP(2);
4802 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004803 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004804 SKIP(6);
4805 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004806 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004807 SKIP(8);
4808 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 SKIP(8);
4811 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004812 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004813 SKIP(7);
4814 return(XML_ATTRIBUTE_NMTOKEN);
4815 }
4816 return(xmlParseEnumeratedType(ctxt, tree));
4817}
4818
4819/**
4820 * xmlParseAttributeListDecl:
4821 * @ctxt: an XML parser context
4822 *
4823 * : parse the Attribute list def for an element
4824 *
4825 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4826 *
4827 * [53] AttDef ::= S Name S AttType S DefaultDecl
4828 *
4829 */
4830void
4831xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004832 const xmlChar *elemName;
4833 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004834 xmlEnumerationPtr tree;
4835
Daniel Veillarda07050d2003-10-19 14:46:32 +00004836 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004837 xmlParserInputPtr input = ctxt->input;
4838
4839 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004840 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004842 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004843 }
4844 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004845 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4848 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 return;
4850 }
4851 SKIP_BLANKS;
4852 GROW;
4853 while (RAW != '>') {
4854 const xmlChar *check = CUR_PTR;
4855 int type;
4856 int def;
4857 xmlChar *defaultValue = NULL;
4858
4859 GROW;
4860 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004861 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4864 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004865 break;
4866 }
4867 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004868 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004870 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004871 if (defaultValue != NULL)
4872 xmlFree(defaultValue);
4873 break;
4874 }
4875 SKIP_BLANKS;
4876
4877 type = xmlParseAttributeType(ctxt, &tree);
4878 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004879 if (defaultValue != NULL)
4880 xmlFree(defaultValue);
4881 break;
4882 }
4883
4884 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004885 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004886 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4887 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004888 if (defaultValue != NULL)
4889 xmlFree(defaultValue);
4890 if (tree != NULL)
4891 xmlFreeEnumeration(tree);
4892 break;
4893 }
4894 SKIP_BLANKS;
4895
4896 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4897 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004898 if (defaultValue != NULL)
4899 xmlFree(defaultValue);
4900 if (tree != NULL)
4901 xmlFreeEnumeration(tree);
4902 break;
4903 }
4904
4905 GROW;
4906 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004907 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004908 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004909 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004910 if (defaultValue != NULL)
4911 xmlFree(defaultValue);
4912 if (tree != NULL)
4913 xmlFreeEnumeration(tree);
4914 break;
4915 }
4916 SKIP_BLANKS;
4917 }
4918 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004919 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4920 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004921 if (defaultValue != NULL)
4922 xmlFree(defaultValue);
4923 if (tree != NULL)
4924 xmlFreeEnumeration(tree);
4925 break;
4926 }
4927 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4928 (ctxt->sax->attributeDecl != NULL))
4929 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4930 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004931 else if (tree != NULL)
4932 xmlFreeEnumeration(tree);
4933
4934 if ((ctxt->sax2) && (defaultValue != NULL) &&
4935 (def != XML_ATTRIBUTE_IMPLIED) &&
4936 (def != XML_ATTRIBUTE_REQUIRED)) {
4937 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4938 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004939 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4940 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4941 }
Owen Taylor3473f882001-02-23 17:55:21 +00004942 if (defaultValue != NULL)
4943 xmlFree(defaultValue);
4944 GROW;
4945 }
4946 if (RAW == '>') {
4947 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004948 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4949 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004950 }
4951 NEXT;
4952 }
Owen Taylor3473f882001-02-23 17:55:21 +00004953 }
4954}
4955
4956/**
4957 * xmlParseElementMixedContentDecl:
4958 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004959 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004960 *
4961 * parse the declaration for a Mixed Element content
4962 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4963 *
4964 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4965 * '(' S? '#PCDATA' S? ')'
4966 *
4967 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4968 *
4969 * [ VC: No Duplicate Types ]
4970 * The same name must not appear more than once in a single
4971 * mixed-content declaration.
4972 *
4973 * returns: the list of the xmlElementContentPtr describing the element choices
4974 */
4975xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004976xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004977 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004978 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004979
4980 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004981 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004982 SKIP(7);
4983 SKIP_BLANKS;
4984 SHRINK;
4985 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004986 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004987 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4988"Element content declaration doesn't start and stop in the same entity\n",
4989 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004990 }
Owen Taylor3473f882001-02-23 17:55:21 +00004991 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004992 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 if (RAW == '*') {
4994 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4995 NEXT;
4996 }
4997 return(ret);
4998 }
4999 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005000 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 if (ret == NULL) return(NULL);
5002 }
5003 while (RAW == '|') {
5004 NEXT;
5005 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005006 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005007 if (ret == NULL) return(NULL);
5008 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005009 if (cur != NULL)
5010 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005011 cur = ret;
5012 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005013 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005014 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005015 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005016 if (n->c1 != NULL)
5017 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005018 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005019 if (n != NULL)
5020 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005021 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005022 }
5023 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005024 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005025 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005026 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005027 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005028 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005029 return(NULL);
5030 }
5031 SKIP_BLANKS;
5032 GROW;
5033 }
5034 if ((RAW == ')') && (NXT(1) == '*')) {
5035 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005036 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005037 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005038 if (cur->c2 != NULL)
5039 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005040 }
5041 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005042 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005043 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5044"Element content declaration doesn't start and stop in the same entity\n",
5045 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005046 }
Owen Taylor3473f882001-02-23 17:55:21 +00005047 SKIP(2);
5048 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005049 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005050 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005051 return(NULL);
5052 }
5053
5054 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005055 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005056 }
5057 return(ret);
5058}
5059
5060/**
5061 * xmlParseElementChildrenContentDecl:
5062 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005063 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005064 *
5065 * parse the declaration for a Mixed Element content
5066 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5067 *
5068 *
5069 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5070 *
5071 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5072 *
5073 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5074 *
5075 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5076 *
5077 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5078 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005079 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005080 * opening or closing parentheses in a choice, seq, or Mixed
5081 * construct is contained in the replacement text for a parameter
5082 * entity, both must be contained in the same replacement text. For
5083 * interoperability, if a parameter-entity reference appears in a
5084 * choice, seq, or Mixed construct, its replacement text should not
5085 * be empty, and neither the first nor last non-blank character of
5086 * the replacement text should be a connector (| or ,).
5087 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005088 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005089 * hierarchy.
5090 */
5091xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005092xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005093 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005094 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005095 xmlChar type = 0;
5096
5097 SKIP_BLANKS;
5098 GROW;
5099 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005100 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005101
Owen Taylor3473f882001-02-23 17:55:21 +00005102 /* Recurse on first child */
5103 NEXT;
5104 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005105 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005106 SKIP_BLANKS;
5107 GROW;
5108 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005109 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005110 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005111 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005112 return(NULL);
5113 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005114 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005115 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005116 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005117 return(NULL);
5118 }
Owen Taylor3473f882001-02-23 17:55:21 +00005119 GROW;
5120 if (RAW == '?') {
5121 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5122 NEXT;
5123 } else if (RAW == '*') {
5124 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5125 NEXT;
5126 } else if (RAW == '+') {
5127 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5128 NEXT;
5129 } else {
5130 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5131 }
Owen Taylor3473f882001-02-23 17:55:21 +00005132 GROW;
5133 }
5134 SKIP_BLANKS;
5135 SHRINK;
5136 while (RAW != ')') {
5137 /*
5138 * Each loop we parse one separator and one element.
5139 */
5140 if (RAW == ',') {
5141 if (type == 0) type = CUR;
5142
5143 /*
5144 * Detect "Name | Name , Name" error
5145 */
5146 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005148 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005149 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005150 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005151 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005152 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005153 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 return(NULL);
5155 }
5156 NEXT;
5157
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005158 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005159 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005160 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005161 xmlFreeDocElementContent(ctxt->myDoc, last);
5162 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 return(NULL);
5164 }
5165 if (last == NULL) {
5166 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005167 if (ret != NULL)
5168 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005169 ret = cur = op;
5170 } else {
5171 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005172 if (op != NULL)
5173 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005174 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005175 if (last != NULL)
5176 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005177 cur =op;
5178 last = NULL;
5179 }
5180 } else if (RAW == '|') {
5181 if (type == 0) type = CUR;
5182
5183 /*
5184 * Detect "Name , Name | Name" error
5185 */
5186 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005187 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005188 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005189 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005190 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005191 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005192 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005193 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 return(NULL);
5195 }
5196 NEXT;
5197
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005198 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005199 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005200 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005201 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005203 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005204 return(NULL);
5205 }
5206 if (last == NULL) {
5207 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005208 if (ret != NULL)
5209 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005210 ret = cur = op;
5211 } else {
5212 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005213 if (op != NULL)
5214 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005215 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005216 if (last != NULL)
5217 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005218 cur =op;
5219 last = NULL;
5220 }
5221 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005222 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005224 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005225 return(NULL);
5226 }
5227 GROW;
5228 SKIP_BLANKS;
5229 GROW;
5230 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005231 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005232 /* Recurse on second child */
5233 NEXT;
5234 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005235 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005236 SKIP_BLANKS;
5237 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005238 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005240 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005242 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005243 return(NULL);
5244 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005245 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005246 if (RAW == '?') {
5247 last->ocur = XML_ELEMENT_CONTENT_OPT;
5248 NEXT;
5249 } else if (RAW == '*') {
5250 last->ocur = XML_ELEMENT_CONTENT_MULT;
5251 NEXT;
5252 } else if (RAW == '+') {
5253 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5254 NEXT;
5255 } else {
5256 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5257 }
5258 }
5259 SKIP_BLANKS;
5260 GROW;
5261 }
5262 if ((cur != NULL) && (last != NULL)) {
5263 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005264 if (last != NULL)
5265 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005267 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005268 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5269"Element content declaration doesn't start and stop in the same entity\n",
5270 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005271 }
Owen Taylor3473f882001-02-23 17:55:21 +00005272 NEXT;
5273 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005274 if (ret != NULL) {
5275 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5276 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5277 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5278 else
5279 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5280 }
Owen Taylor3473f882001-02-23 17:55:21 +00005281 NEXT;
5282 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005283 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005284 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005285 cur = ret;
5286 /*
5287 * Some normalization:
5288 * (a | b* | c?)* == (a | b | c)*
5289 */
5290 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5291 if ((cur->c1 != NULL) &&
5292 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5293 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5294 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5295 if ((cur->c2 != NULL) &&
5296 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5297 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5298 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5299 cur = cur->c2;
5300 }
5301 }
Owen Taylor3473f882001-02-23 17:55:21 +00005302 NEXT;
5303 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005304 if (ret != NULL) {
5305 int found = 0;
5306
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005307 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5308 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5309 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005310 else
5311 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005312 /*
5313 * Some normalization:
5314 * (a | b*)+ == (a | b)*
5315 * (a | b?)+ == (a | b)*
5316 */
5317 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5318 if ((cur->c1 != NULL) &&
5319 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5320 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5321 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5322 found = 1;
5323 }
5324 if ((cur->c2 != NULL) &&
5325 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5326 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5327 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5328 found = 1;
5329 }
5330 cur = cur->c2;
5331 }
5332 if (found)
5333 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5334 }
Owen Taylor3473f882001-02-23 17:55:21 +00005335 NEXT;
5336 }
5337 return(ret);
5338}
5339
5340/**
5341 * xmlParseElementContentDecl:
5342 * @ctxt: an XML parser context
5343 * @name: the name of the element being defined.
5344 * @result: the Element Content pointer will be stored here if any
5345 *
5346 * parse the declaration for an Element content either Mixed or Children,
5347 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5348 *
5349 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5350 *
5351 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5352 */
5353
5354int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005355xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005356 xmlElementContentPtr *result) {
5357
5358 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005359 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005360 int res;
5361
5362 *result = NULL;
5363
5364 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005365 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005366 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005367 return(-1);
5368 }
5369 NEXT;
5370 GROW;
5371 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005372 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005374 res = XML_ELEMENT_TYPE_MIXED;
5375 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005376 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005377 res = XML_ELEMENT_TYPE_ELEMENT;
5378 }
Owen Taylor3473f882001-02-23 17:55:21 +00005379 SKIP_BLANKS;
5380 *result = tree;
5381 return(res);
5382}
5383
5384/**
5385 * xmlParseElementDecl:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse an Element declaration.
5389 *
5390 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5391 *
5392 * [ VC: Unique Element Type Declaration ]
5393 * No element type may be declared more than once
5394 *
5395 * Returns the type of the element, or -1 in case of error
5396 */
5397int
5398xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005399 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 int ret = -1;
5401 xmlElementContentPtr content = NULL;
5402
Daniel Veillard4c778d82005-01-23 17:37:44 +00005403 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005404 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005405 xmlParserInputPtr input = ctxt->input;
5406
5407 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005408 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5410 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005411 }
5412 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005413 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005414 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005415 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5416 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005417 return(-1);
5418 }
5419 while ((RAW == 0) && (ctxt->inputNr > 1))
5420 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005421 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005424 }
5425 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005426 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005427 SKIP(5);
5428 /*
5429 * Element must always be empty.
5430 */
5431 ret = XML_ELEMENT_TYPE_EMPTY;
5432 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5433 (NXT(2) == 'Y')) {
5434 SKIP(3);
5435 /*
5436 * Element is a generic container.
5437 */
5438 ret = XML_ELEMENT_TYPE_ANY;
5439 } else if (RAW == '(') {
5440 ret = xmlParseElementContentDecl(ctxt, name, &content);
5441 } else {
5442 /*
5443 * [ WFC: PEs in Internal Subset ] error handling.
5444 */
5445 if ((RAW == '%') && (ctxt->external == 0) &&
5446 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005447 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005449 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005450 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005451 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5452 }
Owen Taylor3473f882001-02-23 17:55:21 +00005453 return(-1);
5454 }
5455
5456 SKIP_BLANKS;
5457 /*
5458 * Pop-up of finished entities.
5459 */
5460 while ((RAW == 0) && (ctxt->inputNr > 1))
5461 xmlPopInput(ctxt);
5462 SKIP_BLANKS;
5463
5464 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005465 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005466 if (content != NULL) {
5467 xmlFreeDocElementContent(ctxt->myDoc, content);
5468 }
Owen Taylor3473f882001-02-23 17:55:21 +00005469 } else {
5470 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005471 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5472 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005473 }
5474
5475 NEXT;
5476 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005477 (ctxt->sax->elementDecl != NULL)) {
5478 if (content != NULL)
5479 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005480 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5481 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005482 if ((content != NULL) && (content->parent == NULL)) {
5483 /*
5484 * this is a trick: if xmlAddElementDecl is called,
5485 * instead of copying the full tree it is plugged directly
5486 * if called from the parser. Avoid duplicating the
5487 * interfaces or change the API/ABI
5488 */
5489 xmlFreeDocElementContent(ctxt->myDoc, content);
5490 }
5491 } else if (content != NULL) {
5492 xmlFreeDocElementContent(ctxt->myDoc, content);
5493 }
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
Owen Taylor3473f882001-02-23 17:55:21 +00005495 }
5496 return(ret);
5497}
5498
5499/**
Owen Taylor3473f882001-02-23 17:55:21 +00005500 * xmlParseConditionalSections
5501 * @ctxt: an XML parser context
5502 *
5503 * [61] conditionalSect ::= includeSect | ignoreSect
5504 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5505 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5506 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5507 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5508 */
5509
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005510static void
Owen Taylor3473f882001-02-23 17:55:21 +00005511xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5512 SKIP(3);
5513 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005514 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005515 SKIP(7);
5516 SKIP_BLANKS;
5517 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005518 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005519 } else {
5520 NEXT;
5521 }
5522 if (xmlParserDebugEntities) {
5523 if ((ctxt->input != NULL) && (ctxt->input->filename))
5524 xmlGenericError(xmlGenericErrorContext,
5525 "%s(%d): ", ctxt->input->filename,
5526 ctxt->input->line);
5527 xmlGenericError(xmlGenericErrorContext,
5528 "Entering INCLUDE Conditional Section\n");
5529 }
5530
5531 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5532 (NXT(2) != '>'))) {
5533 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005534 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005535
5536 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5537 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005538 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005539 NEXT;
5540 } else if (RAW == '%') {
5541 xmlParsePEReference(ctxt);
5542 } else
5543 xmlParseMarkupDecl(ctxt);
5544
5545 /*
5546 * Pop-up of finished entities.
5547 */
5548 while ((RAW == 0) && (ctxt->inputNr > 1))
5549 xmlPopInput(ctxt);
5550
Daniel Veillardfdc91562002-07-01 21:52:03 +00005551 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005552 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005553 break;
5554 }
5555 }
5556 if (xmlParserDebugEntities) {
5557 if ((ctxt->input != NULL) && (ctxt->input->filename))
5558 xmlGenericError(xmlGenericErrorContext,
5559 "%s(%d): ", ctxt->input->filename,
5560 ctxt->input->line);
5561 xmlGenericError(xmlGenericErrorContext,
5562 "Leaving INCLUDE Conditional Section\n");
5563 }
5564
Daniel Veillarda07050d2003-10-19 14:46:32 +00005565 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005566 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005567 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005568 int depth = 0;
5569
5570 SKIP(6);
5571 SKIP_BLANKS;
5572 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005573 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005574 } else {
5575 NEXT;
5576 }
5577 if (xmlParserDebugEntities) {
5578 if ((ctxt->input != NULL) && (ctxt->input->filename))
5579 xmlGenericError(xmlGenericErrorContext,
5580 "%s(%d): ", ctxt->input->filename,
5581 ctxt->input->line);
5582 xmlGenericError(xmlGenericErrorContext,
5583 "Entering IGNORE Conditional Section\n");
5584 }
5585
5586 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005587 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005588 * But disable SAX event generating DTD building in the meantime
5589 */
5590 state = ctxt->disableSAX;
5591 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005592 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005593 ctxt->instate = XML_PARSER_IGNORE;
5594
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005595 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005596 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5597 depth++;
5598 SKIP(3);
5599 continue;
5600 }
5601 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5602 if (--depth >= 0) SKIP(3);
5603 continue;
5604 }
5605 NEXT;
5606 continue;
5607 }
5608
5609 ctxt->disableSAX = state;
5610 ctxt->instate = instate;
5611
5612 if (xmlParserDebugEntities) {
5613 if ((ctxt->input != NULL) && (ctxt->input->filename))
5614 xmlGenericError(xmlGenericErrorContext,
5615 "%s(%d): ", ctxt->input->filename,
5616 ctxt->input->line);
5617 xmlGenericError(xmlGenericErrorContext,
5618 "Leaving IGNORE Conditional Section\n");
5619 }
5620
5621 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
5624
5625 if (RAW == 0)
5626 SHRINK;
5627
5628 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005629 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005630 } else {
5631 SKIP(3);
5632 }
5633}
5634
5635/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005636 * xmlParseMarkupDecl:
5637 * @ctxt: an XML parser context
5638 *
5639 * parse Markup declarations
5640 *
5641 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5642 * NotationDecl | PI | Comment
5643 *
5644 * [ VC: Proper Declaration/PE Nesting ]
5645 * Parameter-entity replacement text must be properly nested with
5646 * markup declarations. That is to say, if either the first character
5647 * or the last character of a markup declaration (markupdecl above) is
5648 * contained in the replacement text for a parameter-entity reference,
5649 * both must be contained in the same replacement text.
5650 *
5651 * [ WFC: PEs in Internal Subset ]
5652 * In the internal DTD subset, parameter-entity references can occur
5653 * only where markup declarations can occur, not within markup declarations.
5654 * (This does not apply to references that occur in external parameter
5655 * entities or to the external subset.)
5656 */
5657void
5658xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5659 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005660 if (CUR == '<') {
5661 if (NXT(1) == '!') {
5662 switch (NXT(2)) {
5663 case 'E':
5664 if (NXT(3) == 'L')
5665 xmlParseElementDecl(ctxt);
5666 else if (NXT(3) == 'N')
5667 xmlParseEntityDecl(ctxt);
5668 break;
5669 case 'A':
5670 xmlParseAttributeListDecl(ctxt);
5671 break;
5672 case 'N':
5673 xmlParseNotationDecl(ctxt);
5674 break;
5675 case '-':
5676 xmlParseComment(ctxt);
5677 break;
5678 default:
5679 /* there is an error but it will be detected later */
5680 break;
5681 }
5682 } else if (NXT(1) == '?') {
5683 xmlParsePI(ctxt);
5684 }
5685 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005686 /*
5687 * This is only for internal subset. On external entities,
5688 * the replacement is done before parsing stage
5689 */
5690 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5691 xmlParsePEReference(ctxt);
5692
5693 /*
5694 * Conditional sections are allowed from entities included
5695 * by PE References in the internal subset.
5696 */
5697 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5698 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5699 xmlParseConditionalSections(ctxt);
5700 }
5701 }
5702
5703 ctxt->instate = XML_PARSER_DTD;
5704}
5705
5706/**
5707 * xmlParseTextDecl:
5708 * @ctxt: an XML parser context
5709 *
5710 * parse an XML declaration header for external entities
5711 *
5712 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5713 *
5714 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5715 */
5716
5717void
5718xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5719 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005720 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005721
5722 /*
5723 * We know that '<?xml' is here.
5724 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005725 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005726 SKIP(5);
5727 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005728 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005729 return;
5730 }
5731
William M. Brack76e95df2003-10-18 16:20:14 +00005732 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005733 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5734 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005735 }
5736 SKIP_BLANKS;
5737
5738 /*
5739 * We may have the VersionInfo here.
5740 */
5741 version = xmlParseVersionInfo(ctxt);
5742 if (version == NULL)
5743 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005744 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005745 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005746 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5747 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005748 }
5749 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 ctxt->input->version = version;
5751
5752 /*
5753 * We must have the encoding declaration
5754 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005755 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005756 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5757 /*
5758 * The XML REC instructs us to stop parsing right here
5759 */
5760 return;
5761 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005762 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5763 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5764 "Missing encoding in text declaration\n");
5765 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005766
5767 SKIP_BLANKS;
5768 if ((RAW == '?') && (NXT(1) == '>')) {
5769 SKIP(2);
5770 } else if (RAW == '>') {
5771 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005773 NEXT;
5774 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005775 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005776 MOVETO_ENDTAG(CUR_PTR);
5777 NEXT;
5778 }
5779}
5780
5781/**
Owen Taylor3473f882001-02-23 17:55:21 +00005782 * xmlParseExternalSubset:
5783 * @ctxt: an XML parser context
5784 * @ExternalID: the external identifier
5785 * @SystemID: the system identifier (or URL)
5786 *
5787 * parse Markup declarations from an external subset
5788 *
5789 * [30] extSubset ::= textDecl? extSubsetDecl
5790 *
5791 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5792 */
5793void
5794xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5795 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005796 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005797 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005798 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005799 xmlParseTextDecl(ctxt);
5800 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5801 /*
5802 * The XML REC instructs us to stop parsing right here
5803 */
5804 ctxt->instate = XML_PARSER_EOF;
5805 return;
5806 }
5807 }
5808 if (ctxt->myDoc == NULL) {
5809 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5810 }
5811 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5812 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5813
5814 ctxt->instate = XML_PARSER_DTD;
5815 ctxt->external = 1;
5816 while (((RAW == '<') && (NXT(1) == '?')) ||
5817 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005818 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005819 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005820 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005821
5822 GROW;
5823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5824 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005825 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005826 NEXT;
5827 } else if (RAW == '%') {
5828 xmlParsePEReference(ctxt);
5829 } else
5830 xmlParseMarkupDecl(ctxt);
5831
5832 /*
5833 * Pop-up of finished entities.
5834 */
5835 while ((RAW == 0) && (ctxt->inputNr > 1))
5836 xmlPopInput(ctxt);
5837
Daniel Veillardfdc91562002-07-01 21:52:03 +00005838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 break;
5841 }
5842 }
5843
5844 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005845 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847
5848}
5849
5850/**
5851 * xmlParseReference:
5852 * @ctxt: an XML parser context
5853 *
5854 * parse and handle entity references in content, depending on the SAX
5855 * interface, this may end-up in a call to character() if this is a
5856 * CharRef, a predefined entity, if there is no reference() callback.
5857 * or if the parser was asked to switch to that mode.
5858 *
5859 * [67] Reference ::= EntityRef | CharRef
5860 */
5861void
5862xmlParseReference(xmlParserCtxtPtr ctxt) {
5863 xmlEntityPtr ent;
5864 xmlChar *val;
5865 if (RAW != '&') return;
5866
5867 if (NXT(1) == '#') {
5868 int i = 0;
5869 xmlChar out[10];
5870 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005871 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005872
5873 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5874 /*
5875 * So we are using non-UTF-8 buffers
5876 * Check that the char fit on 8bits, if not
5877 * generate a CharRef.
5878 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005879 if (value <= 0xFF) {
5880 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005881 out[1] = 0;
5882 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5883 (!ctxt->disableSAX))
5884 ctxt->sax->characters(ctxt->userData, out, 1);
5885 } else {
5886 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005887 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005888 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005889 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5891 (!ctxt->disableSAX))
5892 ctxt->sax->reference(ctxt->userData, out);
5893 }
5894 } else {
5895 /*
5896 * Just encode the value in UTF-8
5897 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005898 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 out[i] = 0;
5900 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5901 (!ctxt->disableSAX))
5902 ctxt->sax->characters(ctxt->userData, out, i);
5903 }
5904 } else {
5905 ent = xmlParseEntityRef(ctxt);
5906 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005907 if (!ctxt->wellFormed)
5908 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 if ((ent->name != NULL) &&
5910 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5911 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005912 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005913
5914
5915 /*
5916 * The first reference to the entity trigger a parsing phase
5917 * where the ent->children is filled with the result from
5918 * the parsing.
5919 */
5920 if (ent->children == NULL) {
5921 xmlChar *value;
5922 value = ent->content;
5923
5924 /*
5925 * Check that this entity is well formed
5926 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005927 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005928 (value[1] == 0) && (value[0] == '<') &&
5929 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5930 /*
5931 * DONE: get definite answer on this !!!
5932 * Lots of entity decls are used to declare a single
5933 * char
5934 * <!ENTITY lt "<">
5935 * Which seems to be valid since
5936 * 2.4: The ampersand character (&) and the left angle
5937 * bracket (<) may appear in their literal form only
5938 * when used ... They are also legal within the literal
5939 * entity value of an internal entity declaration;i
5940 * see "4.3.2 Well-Formed Parsed Entities".
5941 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5942 * Looking at the OASIS test suite and James Clark
5943 * tests, this is broken. However the XML REC uses
5944 * it. Is the XML REC not well-formed ????
5945 * This is a hack to avoid this problem
5946 *
5947 * ANSWER: since lt gt amp .. are already defined,
5948 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005949 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005950 * is lousy but acceptable.
5951 */
5952 list = xmlNewDocText(ctxt->myDoc, value);
5953 if (list != NULL) {
5954 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5955 (ent->children == NULL)) {
5956 ent->children = list;
5957 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005958 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005959 list->parent = (xmlNodePtr) ent;
5960 } else {
5961 xmlFreeNodeList(list);
5962 }
5963 } else if (list != NULL) {
5964 xmlFreeNodeList(list);
5965 }
5966 } else {
5967 /*
5968 * 4.3.2: An internal general parsed entity is well-formed
5969 * if its replacement text matches the production labeled
5970 * content.
5971 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005972
5973 void *user_data;
5974 /*
5975 * This is a bit hackish but this seems the best
5976 * way to make sure both SAX and DOM entity support
5977 * behaves okay.
5978 */
5979 if (ctxt->userData == ctxt)
5980 user_data = NULL;
5981 else
5982 user_data = ctxt->userData;
5983
Owen Taylor3473f882001-02-23 17:55:21 +00005984 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5985 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005986 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5987 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005988 ctxt->depth--;
5989 } else if (ent->etype ==
5990 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5991 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005992 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005993 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005994 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005995 ctxt->depth--;
5996 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005997 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005998 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5999 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 }
6001 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006002 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006003 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006004 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006005 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6006 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006007 (ent->children == NULL)) {
6008 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006009 if (ctxt->replaceEntities) {
6010 /*
6011 * Prune it directly in the generated document
6012 * except for single text nodes.
6013 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006014 if (((list->type == XML_TEXT_NODE) &&
6015 (list->next == NULL)) ||
6016 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006017 list->parent = (xmlNodePtr) ent;
6018 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006019 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006020 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006021 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006022 while (list != NULL) {
6023 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006024 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006025 if (list->next == NULL)
6026 ent->last = list;
6027 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006028 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006029 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006030#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006031 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6032 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006033#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006034 }
6035 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006036 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006037 while (list != NULL) {
6038 list->parent = (xmlNodePtr) ent;
6039 if (list->next == NULL)
6040 ent->last = list;
6041 list = list->next;
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 }
6044 } else {
6045 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006046 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006048 } else if ((ret != XML_ERR_OK) &&
6049 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006050 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 } else if (list != NULL) {
6052 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006053 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006054 }
6055 }
6056 }
6057 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6058 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6059 /*
6060 * Create a node.
6061 */
6062 ctxt->sax->reference(ctxt->userData, ent->name);
6063 return;
6064 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006065 /*
6066 * There is a problem on the handling of _private for entities
6067 * (bug 155816): Should we copy the content of the field from
6068 * the entity (possibly overwriting some value set by the user
6069 * when a copy is created), should we leave it alone, or should
6070 * we try to take care of different situations? The problem
6071 * is exacerbated by the usage of this field by the xmlReader.
6072 * To fix this bug, we look at _private on the created node
6073 * and, if it's NULL, we copy in whatever was in the entity.
6074 * If it's not NULL we leave it alone. This is somewhat of a
6075 * hack - maybe we should have further tests to determine
6076 * what to do.
6077 */
Owen Taylor3473f882001-02-23 17:55:21 +00006078 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6079 /*
6080 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006081 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006082 * In the first occurrence list contains the replacement.
6083 * progressive == 2 means we are operating on the Reader
6084 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006085 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006086 if (((list == NULL) && (ent->owner == 0)) ||
6087 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006088 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006089
6090 /*
6091 * when operating on a reader, the entities definitions
6092 * are always owning the entities subtree.
6093 if (ctxt->parseMode == XML_PARSE_READER)
6094 ent->owner = 1;
6095 */
6096
Daniel Veillard62f313b2001-07-04 19:49:14 +00006097 cur = ent->children;
6098 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006099 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006100 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006101 if (nw->_private == NULL)
6102 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006103 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006104 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006105 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006106 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006107 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006108 if (cur == ent->last) {
6109 /*
6110 * needed to detect some strange empty
6111 * node cases in the reader tests
6112 */
6113 if ((ctxt->parseMode == XML_PARSE_READER) &&
6114 (nw->type == XML_ELEMENT_NODE) &&
6115 (nw->children == NULL))
6116 nw->extra = 1;
6117
Daniel Veillard62f313b2001-07-04 19:49:14 +00006118 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006119 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006120 cur = cur->next;
6121 }
Daniel Veillard81273902003-09-30 00:43:48 +00006122#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006123 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006124 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006125#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006126 } else if (list == NULL) {
6127 xmlNodePtr nw = NULL, cur, next, last,
6128 firstChild = NULL;
6129 /*
6130 * Copy the entity child list and make it the new
6131 * entity child list. The goal is to make sure any
6132 * ID or REF referenced will be the one from the
6133 * document content and not the entity copy.
6134 */
6135 cur = ent->children;
6136 ent->children = NULL;
6137 last = ent->last;
6138 ent->last = NULL;
6139 while (cur != NULL) {
6140 next = cur->next;
6141 cur->next = NULL;
6142 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006143 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006144 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006145 if (nw->_private == NULL)
6146 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006147 if (firstChild == NULL){
6148 firstChild = cur;
6149 }
6150 xmlAddChild((xmlNodePtr) ent, nw);
6151 xmlAddChild(ctxt->node, cur);
6152 }
6153 if (cur == last)
6154 break;
6155 cur = next;
6156 }
6157 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006158#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006159 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6160 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006161#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006162 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006163 const xmlChar *nbktext;
6164
Daniel Veillard62f313b2001-07-04 19:49:14 +00006165 /*
6166 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006167 * node with a possible previous text one which
6168 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006169 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006170 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6171 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006172 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006173 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006174 if ((ent->last != ent->children) &&
6175 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006176 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006177 xmlAddChildList(ctxt->node, ent->children);
6178 }
6179
Owen Taylor3473f882001-02-23 17:55:21 +00006180 /*
6181 * This is to avoid a nasty side effect, see
6182 * characters() in SAX.c
6183 */
6184 ctxt->nodemem = 0;
6185 ctxt->nodelen = 0;
6186 return;
6187 } else {
6188 /*
6189 * Probably running in SAX mode
6190 */
6191 xmlParserInputPtr input;
6192
6193 input = xmlNewEntityInputStream(ctxt, ent);
6194 xmlPushInput(ctxt, input);
6195 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006196 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6197 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006198 xmlParseTextDecl(ctxt);
6199 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6200 /*
6201 * The XML REC instructs us to stop parsing right here
6202 */
6203 ctxt->instate = XML_PARSER_EOF;
6204 return;
6205 }
6206 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006207 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6208 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006209 }
6210 }
6211 return;
6212 }
6213 }
6214 } else {
6215 val = ent->content;
6216 if (val == NULL) return;
6217 /*
6218 * inline the entity.
6219 */
6220 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6221 (!ctxt->disableSAX))
6222 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6223 }
6224 }
6225}
6226
6227/**
6228 * xmlParseEntityRef:
6229 * @ctxt: an XML parser context
6230 *
6231 * parse ENTITY references declarations
6232 *
6233 * [68] EntityRef ::= '&' Name ';'
6234 *
6235 * [ WFC: Entity Declared ]
6236 * In a document without any DTD, a document with only an internal DTD
6237 * subset which contains no parameter entity references, or a document
6238 * with "standalone='yes'", the Name given in the entity reference
6239 * must match that in an entity declaration, except that well-formed
6240 * documents need not declare any of the following entities: amp, lt,
6241 * gt, apos, quot. The declaration of a parameter entity must precede
6242 * any reference to it. Similarly, the declaration of a general entity
6243 * must precede any reference to it which appears in a default value in an
6244 * attribute-list declaration. Note that if entities are declared in the
6245 * external subset or in external parameter entities, a non-validating
6246 * processor is not obligated to read and process their declarations;
6247 * for such documents, the rule that an entity must be declared is a
6248 * well-formedness constraint only if standalone='yes'.
6249 *
6250 * [ WFC: Parsed Entity ]
6251 * An entity reference must not contain the name of an unparsed entity
6252 *
6253 * Returns the xmlEntityPtr if found, or NULL otherwise.
6254 */
6255xmlEntityPtr
6256xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006257 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006258 xmlEntityPtr ent = NULL;
6259
6260 GROW;
6261
6262 if (RAW == '&') {
6263 NEXT;
6264 name = xmlParseName(ctxt);
6265 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6267 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006268 } else {
6269 if (RAW == ';') {
6270 NEXT;
6271 /*
6272 * Ask first SAX for entity resolution, otherwise try the
6273 * predefined set.
6274 */
6275 if (ctxt->sax != NULL) {
6276 if (ctxt->sax->getEntity != NULL)
6277 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006278 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006279 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006280 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6281 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006282 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006283 }
Owen Taylor3473f882001-02-23 17:55:21 +00006284 }
6285 /*
6286 * [ WFC: Entity Declared ]
6287 * In a document without any DTD, a document with only an
6288 * internal DTD subset which contains no parameter entity
6289 * references, or a document with "standalone='yes'", the
6290 * Name given in the entity reference must match that in an
6291 * entity declaration, except that well-formed documents
6292 * need not declare any of the following entities: amp, lt,
6293 * gt, apos, quot.
6294 * The declaration of a parameter entity must precede any
6295 * reference to it.
6296 * Similarly, the declaration of a general entity must
6297 * precede any reference to it which appears in a default
6298 * value in an attribute-list declaration. Note that if
6299 * entities are declared in the external subset or in
6300 * external parameter entities, a non-validating processor
6301 * is not obligated to read and process their declarations;
6302 * for such documents, the rule that an entity must be
6303 * declared is a well-formedness constraint only if
6304 * standalone='yes'.
6305 */
6306 if (ent == NULL) {
6307 if ((ctxt->standalone == 1) ||
6308 ((ctxt->hasExternalSubset == 0) &&
6309 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006310 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006311 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006313 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006314 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006315 if ((ctxt->inSubset == 0) &&
6316 (ctxt->sax != NULL) &&
6317 (ctxt->sax->reference != NULL)) {
6318 ctxt->sax->reference(ctxt, name);
6319 }
Owen Taylor3473f882001-02-23 17:55:21 +00006320 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006321 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006322 }
6323
6324 /*
6325 * [ WFC: Parsed Entity ]
6326 * An entity reference must not contain the name of an
6327 * unparsed entity
6328 */
6329 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006330 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006331 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006332 }
6333
6334 /*
6335 * [ WFC: No External Entity References ]
6336 * Attribute values cannot contain direct or indirect
6337 * entity references to external entities.
6338 */
6339 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6340 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006341 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6342 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006343 }
6344 /*
6345 * [ WFC: No < in Attribute Values ]
6346 * The replacement text of any entity referred to directly or
6347 * indirectly in an attribute value (other than "&lt;") must
6348 * not contain a <.
6349 */
6350 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6351 (ent != NULL) &&
6352 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6353 (ent->content != NULL) &&
6354 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006355 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006356 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006357 }
6358
6359 /*
6360 * Internal check, no parameter entities here ...
6361 */
6362 else {
6363 switch (ent->etype) {
6364 case XML_INTERNAL_PARAMETER_ENTITY:
6365 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006366 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6367 "Attempt to reference the parameter entity '%s'\n",
6368 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006369 break;
6370 default:
6371 break;
6372 }
6373 }
6374
6375 /*
6376 * [ WFC: No Recursion ]
6377 * A parsed entity must not contain a recursive reference
6378 * to itself, either directly or indirectly.
6379 * Done somewhere else
6380 */
6381
6382 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006383 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 }
Owen Taylor3473f882001-02-23 17:55:21 +00006385 }
6386 }
6387 return(ent);
6388}
6389
6390/**
6391 * xmlParseStringEntityRef:
6392 * @ctxt: an XML parser context
6393 * @str: a pointer to an index in the string
6394 *
6395 * parse ENTITY references declarations, but this version parses it from
6396 * a string value.
6397 *
6398 * [68] EntityRef ::= '&' Name ';'
6399 *
6400 * [ WFC: Entity Declared ]
6401 * In a document without any DTD, a document with only an internal DTD
6402 * subset which contains no parameter entity references, or a document
6403 * with "standalone='yes'", the Name given in the entity reference
6404 * must match that in an entity declaration, except that well-formed
6405 * documents need not declare any of the following entities: amp, lt,
6406 * gt, apos, quot. The declaration of a parameter entity must precede
6407 * any reference to it. Similarly, the declaration of a general entity
6408 * must precede any reference to it which appears in a default value in an
6409 * attribute-list declaration. Note that if entities are declared in the
6410 * external subset or in external parameter entities, a non-validating
6411 * processor is not obligated to read and process their declarations;
6412 * for such documents, the rule that an entity must be declared is a
6413 * well-formedness constraint only if standalone='yes'.
6414 *
6415 * [ WFC: Parsed Entity ]
6416 * An entity reference must not contain the name of an unparsed entity
6417 *
6418 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6419 * is updated to the current location in the string.
6420 */
6421xmlEntityPtr
6422xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6423 xmlChar *name;
6424 const xmlChar *ptr;
6425 xmlChar cur;
6426 xmlEntityPtr ent = NULL;
6427
6428 if ((str == NULL) || (*str == NULL))
6429 return(NULL);
6430 ptr = *str;
6431 cur = *ptr;
6432 if (cur == '&') {
6433 ptr++;
6434 cur = *ptr;
6435 name = xmlParseStringName(ctxt, &ptr);
6436 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006437 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6438 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006439 } else {
6440 if (*ptr == ';') {
6441 ptr++;
6442 /*
6443 * Ask first SAX for entity resolution, otherwise try the
6444 * predefined set.
6445 */
6446 if (ctxt->sax != NULL) {
6447 if (ctxt->sax->getEntity != NULL)
6448 ent = ctxt->sax->getEntity(ctxt->userData, name);
6449 if (ent == NULL)
6450 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006451 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006452 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006453 }
Owen Taylor3473f882001-02-23 17:55:21 +00006454 }
6455 /*
6456 * [ WFC: Entity Declared ]
6457 * In a document without any DTD, a document with only an
6458 * internal DTD subset which contains no parameter entity
6459 * references, or a document with "standalone='yes'", the
6460 * Name given in the entity reference must match that in an
6461 * entity declaration, except that well-formed documents
6462 * need not declare any of the following entities: amp, lt,
6463 * gt, apos, quot.
6464 * The declaration of a parameter entity must precede any
6465 * reference to it.
6466 * Similarly, the declaration of a general entity must
6467 * precede any reference to it which appears in a default
6468 * value in an attribute-list declaration. Note that if
6469 * entities are declared in the external subset or in
6470 * external parameter entities, a non-validating processor
6471 * is not obligated to read and process their declarations;
6472 * for such documents, the rule that an entity must be
6473 * declared is a well-formedness constraint only if
6474 * standalone='yes'.
6475 */
6476 if (ent == NULL) {
6477 if ((ctxt->standalone == 1) ||
6478 ((ctxt->hasExternalSubset == 0) &&
6479 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006480 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006481 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006482 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006483 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006484 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006485 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006487 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006488 }
6489
6490 /*
6491 * [ WFC: Parsed Entity ]
6492 * An entity reference must not contain the name of an
6493 * unparsed entity
6494 */
6495 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006496 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006497 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 }
6499
6500 /*
6501 * [ WFC: No External Entity References ]
6502 * Attribute values cannot contain direct or indirect
6503 * entity references to external entities.
6504 */
6505 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6506 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006507 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006508 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006509 }
6510 /*
6511 * [ WFC: No < in Attribute Values ]
6512 * The replacement text of any entity referred to directly or
6513 * indirectly in an attribute value (other than "&lt;") must
6514 * not contain a <.
6515 */
6516 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6517 (ent != NULL) &&
6518 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6519 (ent->content != NULL) &&
6520 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006521 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6522 "'<' in entity '%s' is not allowed in attributes values\n",
6523 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006524 }
6525
6526 /*
6527 * Internal check, no parameter entities here ...
6528 */
6529 else {
6530 switch (ent->etype) {
6531 case XML_INTERNAL_PARAMETER_ENTITY:
6532 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006533 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6534 "Attempt to reference the parameter entity '%s'\n",
6535 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006536 break;
6537 default:
6538 break;
6539 }
6540 }
6541
6542 /*
6543 * [ WFC: No Recursion ]
6544 * A parsed entity must not contain a recursive reference
6545 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006546 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006547 */
6548
6549 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006550 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006551 }
6552 xmlFree(name);
6553 }
6554 }
6555 *str = ptr;
6556 return(ent);
6557}
6558
6559/**
6560 * xmlParsePEReference:
6561 * @ctxt: an XML parser context
6562 *
6563 * parse PEReference declarations
6564 * The entity content is handled directly by pushing it's content as
6565 * a new input stream.
6566 *
6567 * [69] PEReference ::= '%' Name ';'
6568 *
6569 * [ WFC: No Recursion ]
6570 * A parsed entity must not contain a recursive
6571 * reference to itself, either directly or indirectly.
6572 *
6573 * [ WFC: Entity Declared ]
6574 * In a document without any DTD, a document with only an internal DTD
6575 * subset which contains no parameter entity references, or a document
6576 * with "standalone='yes'", ... ... The declaration of a parameter
6577 * entity must precede any reference to it...
6578 *
6579 * [ VC: Entity Declared ]
6580 * In a document with an external subset or external parameter entities
6581 * with "standalone='no'", ... ... The declaration of a parameter entity
6582 * must precede any reference to it...
6583 *
6584 * [ WFC: In DTD ]
6585 * Parameter-entity references may only appear in the DTD.
6586 * NOTE: misleading but this is handled.
6587 */
6588void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006589xmlParsePEReference(xmlParserCtxtPtr ctxt)
6590{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006591 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006592 xmlEntityPtr entity = NULL;
6593 xmlParserInputPtr input;
6594
6595 if (RAW == '%') {
6596 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006597 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006598 if (name == NULL) {
6599 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6600 "xmlParsePEReference: no name\n");
6601 } else {
6602 if (RAW == ';') {
6603 NEXT;
6604 if ((ctxt->sax != NULL) &&
6605 (ctxt->sax->getParameterEntity != NULL))
6606 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6607 name);
6608 if (entity == NULL) {
6609 /*
6610 * [ WFC: Entity Declared ]
6611 * In a document without any DTD, a document with only an
6612 * internal DTD subset which contains no parameter entity
6613 * references, or a document with "standalone='yes'", ...
6614 * ... The declaration of a parameter entity must precede
6615 * any reference to it...
6616 */
6617 if ((ctxt->standalone == 1) ||
6618 ((ctxt->hasExternalSubset == 0) &&
6619 (ctxt->hasPErefs == 0))) {
6620 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6621 "PEReference: %%%s; not found\n",
6622 name);
6623 } else {
6624 /*
6625 * [ VC: Entity Declared ]
6626 * In a document with an external subset or external
6627 * parameter entities with "standalone='no'", ...
6628 * ... The declaration of a parameter entity must
6629 * precede any reference to it...
6630 */
6631 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6632 "PEReference: %%%s; not found\n",
6633 name, NULL);
6634 ctxt->valid = 0;
6635 }
6636 } else {
6637 /*
6638 * Internal checking in case the entity quest barfed
6639 */
6640 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6641 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6642 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6643 "Internal: %%%s; is not a parameter entity\n",
6644 name, NULL);
6645 } else if (ctxt->input->free != deallocblankswrapper) {
6646 input =
6647 xmlNewBlanksWrapperInputStream(ctxt, entity);
6648 xmlPushInput(ctxt, input);
6649 } else {
6650 /*
6651 * TODO !!!
6652 * handle the extra spaces added before and after
6653 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6654 */
6655 input = xmlNewEntityInputStream(ctxt, entity);
6656 xmlPushInput(ctxt, input);
6657 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006658 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006659 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006660 xmlParseTextDecl(ctxt);
6661 if (ctxt->errNo ==
6662 XML_ERR_UNSUPPORTED_ENCODING) {
6663 /*
6664 * The XML REC instructs us to stop parsing
6665 * right here
6666 */
6667 ctxt->instate = XML_PARSER_EOF;
6668 return;
6669 }
6670 }
6671 }
6672 }
6673 ctxt->hasPErefs = 1;
6674 } else {
6675 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6676 }
6677 }
Owen Taylor3473f882001-02-23 17:55:21 +00006678 }
6679}
6680
6681/**
6682 * xmlParseStringPEReference:
6683 * @ctxt: an XML parser context
6684 * @str: a pointer to an index in the string
6685 *
6686 * parse PEReference declarations
6687 *
6688 * [69] PEReference ::= '%' Name ';'
6689 *
6690 * [ WFC: No Recursion ]
6691 * A parsed entity must not contain a recursive
6692 * reference to itself, either directly or indirectly.
6693 *
6694 * [ WFC: Entity Declared ]
6695 * In a document without any DTD, a document with only an internal DTD
6696 * subset which contains no parameter entity references, or a document
6697 * with "standalone='yes'", ... ... The declaration of a parameter
6698 * entity must precede any reference to it...
6699 *
6700 * [ VC: Entity Declared ]
6701 * In a document with an external subset or external parameter entities
6702 * with "standalone='no'", ... ... The declaration of a parameter entity
6703 * must precede any reference to it...
6704 *
6705 * [ WFC: In DTD ]
6706 * Parameter-entity references may only appear in the DTD.
6707 * NOTE: misleading but this is handled.
6708 *
6709 * Returns the string of the entity content.
6710 * str is updated to the current value of the index
6711 */
6712xmlEntityPtr
6713xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6714 const xmlChar *ptr;
6715 xmlChar cur;
6716 xmlChar *name;
6717 xmlEntityPtr entity = NULL;
6718
6719 if ((str == NULL) || (*str == NULL)) return(NULL);
6720 ptr = *str;
6721 cur = *ptr;
6722 if (cur == '%') {
6723 ptr++;
6724 cur = *ptr;
6725 name = xmlParseStringName(ctxt, &ptr);
6726 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6728 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006729 } else {
6730 cur = *ptr;
6731 if (cur == ';') {
6732 ptr++;
6733 cur = *ptr;
6734 if ((ctxt->sax != NULL) &&
6735 (ctxt->sax->getParameterEntity != NULL))
6736 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6737 name);
6738 if (entity == NULL) {
6739 /*
6740 * [ WFC: Entity Declared ]
6741 * In a document without any DTD, a document with only an
6742 * internal DTD subset which contains no parameter entity
6743 * references, or a document with "standalone='yes'", ...
6744 * ... The declaration of a parameter entity must precede
6745 * any reference to it...
6746 */
6747 if ((ctxt->standalone == 1) ||
6748 ((ctxt->hasExternalSubset == 0) &&
6749 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006750 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006751 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else {
6753 /*
6754 * [ VC: Entity Declared ]
6755 * In a document with an external subset or external
6756 * parameter entities with "standalone='no'", ...
6757 * ... The declaration of a parameter entity must
6758 * precede any reference to it...
6759 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006760 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6761 "PEReference: %%%s; not found\n",
6762 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006763 ctxt->valid = 0;
6764 }
6765 } else {
6766 /*
6767 * Internal checking in case the entity quest barfed
6768 */
6769 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6770 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006771 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6772 "%%%s; is not a parameter entity\n",
6773 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006774 }
6775 }
6776 ctxt->hasPErefs = 1;
6777 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006778 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006779 }
6780 xmlFree(name);
6781 }
6782 }
6783 *str = ptr;
6784 return(entity);
6785}
6786
6787/**
6788 * xmlParseDocTypeDecl:
6789 * @ctxt: an XML parser context
6790 *
6791 * parse a DOCTYPE declaration
6792 *
6793 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6794 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6795 *
6796 * [ VC: Root Element Type ]
6797 * The Name in the document type declaration must match the element
6798 * type of the root element.
6799 */
6800
6801void
6802xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006803 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006804 xmlChar *ExternalID = NULL;
6805 xmlChar *URI = NULL;
6806
6807 /*
6808 * We know that '<!DOCTYPE' has been detected.
6809 */
6810 SKIP(9);
6811
6812 SKIP_BLANKS;
6813
6814 /*
6815 * Parse the DOCTYPE name.
6816 */
6817 name = xmlParseName(ctxt);
6818 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006819 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6820 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006821 }
6822 ctxt->intSubName = name;
6823
6824 SKIP_BLANKS;
6825
6826 /*
6827 * Check for SystemID and ExternalID
6828 */
6829 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6830
6831 if ((URI != NULL) || (ExternalID != NULL)) {
6832 ctxt->hasExternalSubset = 1;
6833 }
6834 ctxt->extSubURI = URI;
6835 ctxt->extSubSystem = ExternalID;
6836
6837 SKIP_BLANKS;
6838
6839 /*
6840 * Create and update the internal subset.
6841 */
6842 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6843 (!ctxt->disableSAX))
6844 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6845
6846 /*
6847 * Is there any internal subset declarations ?
6848 * they are handled separately in xmlParseInternalSubset()
6849 */
6850 if (RAW == '[')
6851 return;
6852
6853 /*
6854 * We should be at the end of the DOCTYPE declaration.
6855 */
6856 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006857 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859 NEXT;
6860}
6861
6862/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006863 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006864 * @ctxt: an XML parser context
6865 *
6866 * parse the internal subset declaration
6867 *
6868 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6869 */
6870
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006871static void
Owen Taylor3473f882001-02-23 17:55:21 +00006872xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6873 /*
6874 * Is there any DTD definition ?
6875 */
6876 if (RAW == '[') {
6877 ctxt->instate = XML_PARSER_DTD;
6878 NEXT;
6879 /*
6880 * Parse the succession of Markup declarations and
6881 * PEReferences.
6882 * Subsequence (markupdecl | PEReference | S)*
6883 */
6884 while (RAW != ']') {
6885 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006886 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006887
6888 SKIP_BLANKS;
6889 xmlParseMarkupDecl(ctxt);
6890 xmlParsePEReference(ctxt);
6891
6892 /*
6893 * Pop-up of finished entities.
6894 */
6895 while ((RAW == 0) && (ctxt->inputNr > 1))
6896 xmlPopInput(ctxt);
6897
6898 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006899 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006900 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006901 break;
6902 }
6903 }
6904 if (RAW == ']') {
6905 NEXT;
6906 SKIP_BLANKS;
6907 }
6908 }
6909
6910 /*
6911 * We should be at the end of the DOCTYPE declaration.
6912 */
6913 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006914 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006915 }
6916 NEXT;
6917}
6918
Daniel Veillard81273902003-09-30 00:43:48 +00006919#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006920/**
6921 * xmlParseAttribute:
6922 * @ctxt: an XML parser context
6923 * @value: a xmlChar ** used to store the value of the attribute
6924 *
6925 * parse an attribute
6926 *
6927 * [41] Attribute ::= Name Eq AttValue
6928 *
6929 * [ WFC: No External Entity References ]
6930 * Attribute values cannot contain direct or indirect entity references
6931 * to external entities.
6932 *
6933 * [ WFC: No < in Attribute Values ]
6934 * The replacement text of any entity referred to directly or indirectly in
6935 * an attribute value (other than "&lt;") must not contain a <.
6936 *
6937 * [ VC: Attribute Value Type ]
6938 * The attribute must have been declared; the value must be of the type
6939 * declared for it.
6940 *
6941 * [25] Eq ::= S? '=' S?
6942 *
6943 * With namespace:
6944 *
6945 * [NS 11] Attribute ::= QName Eq AttValue
6946 *
6947 * Also the case QName == xmlns:??? is handled independently as a namespace
6948 * definition.
6949 *
6950 * Returns the attribute name, and the value in *value.
6951 */
6952
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006953const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006954xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006955 const xmlChar *name;
6956 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006957
6958 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006959 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006960 name = xmlParseName(ctxt);
6961 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006962 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006963 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006964 return(NULL);
6965 }
6966
6967 /*
6968 * read the value
6969 */
6970 SKIP_BLANKS;
6971 if (RAW == '=') {
6972 NEXT;
6973 SKIP_BLANKS;
6974 val = xmlParseAttValue(ctxt);
6975 ctxt->instate = XML_PARSER_CONTENT;
6976 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006977 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006978 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006979 return(NULL);
6980 }
6981
6982 /*
6983 * Check that xml:lang conforms to the specification
6984 * No more registered as an error, just generate a warning now
6985 * since this was deprecated in XML second edition
6986 */
6987 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6988 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006989 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6990 "Malformed value for xml:lang : %s\n",
6991 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006992 }
6993 }
6994
6995 /*
6996 * Check that xml:space conforms to the specification
6997 */
6998 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6999 if (xmlStrEqual(val, BAD_CAST "default"))
7000 *(ctxt->space) = 0;
7001 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7002 *(ctxt->space) = 1;
7003 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007004 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007005"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007006 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007007 }
7008 }
7009
7010 *value = val;
7011 return(name);
7012}
7013
7014/**
7015 * xmlParseStartTag:
7016 * @ctxt: an XML parser context
7017 *
7018 * parse a start of tag either for rule element or
7019 * EmptyElement. In both case we don't parse the tag closing chars.
7020 *
7021 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7022 *
7023 * [ WFC: Unique Att Spec ]
7024 * No attribute name may appear more than once in the same start-tag or
7025 * empty-element tag.
7026 *
7027 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7028 *
7029 * [ WFC: Unique Att Spec ]
7030 * No attribute name may appear more than once in the same start-tag or
7031 * empty-element tag.
7032 *
7033 * With namespace:
7034 *
7035 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7036 *
7037 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7038 *
7039 * Returns the element name parsed
7040 */
7041
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007042const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007043xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007044 const xmlChar *name;
7045 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007046 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007047 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007048 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007049 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007050 int i;
7051
7052 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007053 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007054
7055 name = xmlParseName(ctxt);
7056 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007057 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007058 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007059 return(NULL);
7060 }
7061
7062 /*
7063 * Now parse the attributes, it ends up with the ending
7064 *
7065 * (S Attribute)* S?
7066 */
7067 SKIP_BLANKS;
7068 GROW;
7069
Daniel Veillard21a0f912001-02-25 19:54:14 +00007070 while ((RAW != '>') &&
7071 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007072 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007073 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007074 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007075
7076 attname = xmlParseAttribute(ctxt, &attvalue);
7077 if ((attname != NULL) && (attvalue != NULL)) {
7078 /*
7079 * [ WFC: Unique Att Spec ]
7080 * No attribute name may appear more than once in the same
7081 * start-tag or empty-element tag.
7082 */
7083 for (i = 0; i < nbatts;i += 2) {
7084 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007085 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007086 xmlFree(attvalue);
7087 goto failed;
7088 }
7089 }
Owen Taylor3473f882001-02-23 17:55:21 +00007090 /*
7091 * Add the pair to atts
7092 */
7093 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007094 maxatts = 22; /* allow for 10 attrs by default */
7095 atts = (const xmlChar **)
7096 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007097 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007098 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007099 if (attvalue != NULL)
7100 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007101 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007102 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007103 ctxt->atts = atts;
7104 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007105 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007106 const xmlChar **n;
7107
Owen Taylor3473f882001-02-23 17:55:21 +00007108 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007109 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007110 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007111 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007112 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007113 if (attvalue != NULL)
7114 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007115 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007116 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007117 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007118 ctxt->atts = atts;
7119 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007120 }
7121 atts[nbatts++] = attname;
7122 atts[nbatts++] = attvalue;
7123 atts[nbatts] = NULL;
7124 atts[nbatts + 1] = NULL;
7125 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007126 if (attvalue != NULL)
7127 xmlFree(attvalue);
7128 }
7129
7130failed:
7131
Daniel Veillard3772de32002-12-17 10:31:45 +00007132 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007133 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7134 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007135 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7137 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 }
7139 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007140 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7141 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007142 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7143 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007144 break;
7145 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007146 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007147 GROW;
7148 }
7149
7150 /*
7151 * SAX: Start of Element !
7152 */
7153 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007154 (!ctxt->disableSAX)) {
7155 if (nbatts > 0)
7156 ctxt->sax->startElement(ctxt->userData, name, atts);
7157 else
7158 ctxt->sax->startElement(ctxt->userData, name, NULL);
7159 }
Owen Taylor3473f882001-02-23 17:55:21 +00007160
7161 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007162 /* Free only the content strings */
7163 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007164 if (atts[i] != NULL)
7165 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007166 }
7167 return(name);
7168}
7169
7170/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007171 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007172 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007173 * @line: line of the start tag
7174 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007175 *
7176 * parse an end of tag
7177 *
7178 * [42] ETag ::= '</' Name S? '>'
7179 *
7180 * With namespace
7181 *
7182 * [NS 9] ETag ::= '</' QName S? '>'
7183 */
7184
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007185static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007186xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007187 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007188
7189 GROW;
7190 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007191 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007192 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007193 return;
7194 }
7195 SKIP(2);
7196
Daniel Veillard46de64e2002-05-29 08:21:33 +00007197 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007198
7199 /*
7200 * We should definitely be at the ending "S? '>'" part
7201 */
7202 GROW;
7203 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007204 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007205 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007206 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007207 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007208
7209 /*
7210 * [ WFC: Element Type Match ]
7211 * The Name in an element's end-tag must match the element type in the
7212 * start-tag.
7213 *
7214 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007215 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007216 if (name == NULL) name = BAD_CAST "unparseable";
7217 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007218 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007219 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007220 }
7221
7222 /*
7223 * SAX: End of Tag
7224 */
7225 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7226 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007227 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007228
Daniel Veillarde57ec792003-09-10 10:50:59 +00007229 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007230 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007231 return;
7232}
7233
7234/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007235 * xmlParseEndTag:
7236 * @ctxt: an XML parser context
7237 *
7238 * parse an end of tag
7239 *
7240 * [42] ETag ::= '</' Name S? '>'
7241 *
7242 * With namespace
7243 *
7244 * [NS 9] ETag ::= '</' QName S? '>'
7245 */
7246
7247void
7248xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007249 xmlParseEndTag1(ctxt, 0);
7250}
Daniel Veillard81273902003-09-30 00:43:48 +00007251#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252
7253/************************************************************************
7254 * *
7255 * SAX 2 specific operations *
7256 * *
7257 ************************************************************************/
7258
7259static const xmlChar *
7260xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7261 int len = 0, l;
7262 int c;
7263 int count = 0;
7264
7265 /*
7266 * Handler for more complex cases
7267 */
7268 GROW;
7269 c = CUR_CHAR(l);
7270 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007271 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007272 return(NULL);
7273 }
7274
7275 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007276 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007277 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007278 (IS_COMBINING(c)) ||
7279 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280 if (count++ > 100) {
7281 count = 0;
7282 GROW;
7283 }
7284 len += l;
7285 NEXTL(l);
7286 c = CUR_CHAR(l);
7287 }
7288 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7289}
7290
7291/*
7292 * xmlGetNamespace:
7293 * @ctxt: an XML parser context
7294 * @prefix: the prefix to lookup
7295 *
7296 * Lookup the namespace name for the @prefix (which ca be NULL)
7297 * The prefix must come from the @ctxt->dict dictionnary
7298 *
7299 * Returns the namespace name or NULL if not bound
7300 */
7301static const xmlChar *
7302xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7303 int i;
7304
Daniel Veillarde57ec792003-09-10 10:50:59 +00007305 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007306 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007307 if (ctxt->nsTab[i] == prefix) {
7308 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7309 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007310 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007311 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007312 return(NULL);
7313}
7314
7315/**
7316 * xmlParseNCName:
7317 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007318 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007319 *
7320 * parse an XML name.
7321 *
7322 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7323 * CombiningChar | Extender
7324 *
7325 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7326 *
7327 * Returns the Name parsed or NULL
7328 */
7329
7330static const xmlChar *
7331xmlParseNCName(xmlParserCtxtPtr ctxt) {
7332 const xmlChar *in;
7333 const xmlChar *ret;
7334 int count = 0;
7335
7336 /*
7337 * Accelerator for simple ASCII names
7338 */
7339 in = ctxt->input->cur;
7340 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7341 ((*in >= 0x41) && (*in <= 0x5A)) ||
7342 (*in == '_')) {
7343 in++;
7344 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7345 ((*in >= 0x41) && (*in <= 0x5A)) ||
7346 ((*in >= 0x30) && (*in <= 0x39)) ||
7347 (*in == '_') || (*in == '-') ||
7348 (*in == '.'))
7349 in++;
7350 if ((*in > 0) && (*in < 0x80)) {
7351 count = in - ctxt->input->cur;
7352 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7353 ctxt->input->cur = in;
7354 ctxt->nbChars += count;
7355 ctxt->input->col += count;
7356 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007357 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007358 }
7359 return(ret);
7360 }
7361 }
7362 return(xmlParseNCNameComplex(ctxt));
7363}
7364
7365/**
7366 * xmlParseQName:
7367 * @ctxt: an XML parser context
7368 * @prefix: pointer to store the prefix part
7369 *
7370 * parse an XML Namespace QName
7371 *
7372 * [6] QName ::= (Prefix ':')? LocalPart
7373 * [7] Prefix ::= NCName
7374 * [8] LocalPart ::= NCName
7375 *
7376 * Returns the Name parsed or NULL
7377 */
7378
7379static const xmlChar *
7380xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7381 const xmlChar *l, *p;
7382
7383 GROW;
7384
7385 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007386 if (l == NULL) {
7387 if (CUR == ':') {
7388 l = xmlParseName(ctxt);
7389 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007390 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7391 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007392 *prefix = NULL;
7393 return(l);
7394 }
7395 }
7396 return(NULL);
7397 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007398 if (CUR == ':') {
7399 NEXT;
7400 p = l;
7401 l = xmlParseNCName(ctxt);
7402 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007403 xmlChar *tmp;
7404
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007405 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7406 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007407 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7408 p = xmlDictLookup(ctxt->dict, tmp, -1);
7409 if (tmp != NULL) xmlFree(tmp);
7410 *prefix = NULL;
7411 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007412 }
7413 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007414 xmlChar *tmp;
7415
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007416 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7417 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007418 NEXT;
7419 tmp = (xmlChar *) xmlParseName(ctxt);
7420 if (tmp != NULL) {
7421 tmp = xmlBuildQName(tmp, l, NULL, 0);
7422 l = xmlDictLookup(ctxt->dict, tmp, -1);
7423 if (tmp != NULL) xmlFree(tmp);
7424 *prefix = p;
7425 return(l);
7426 }
7427 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7428 l = xmlDictLookup(ctxt->dict, tmp, -1);
7429 if (tmp != NULL) xmlFree(tmp);
7430 *prefix = p;
7431 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007432 }
7433 *prefix = p;
7434 } else
7435 *prefix = NULL;
7436 return(l);
7437}
7438
7439/**
7440 * xmlParseQNameAndCompare:
7441 * @ctxt: an XML parser context
7442 * @name: the localname
7443 * @prefix: the prefix, if any.
7444 *
7445 * parse an XML name and compares for match
7446 * (specialized for endtag parsing)
7447 *
7448 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7449 * and the name for mismatch
7450 */
7451
7452static const xmlChar *
7453xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7454 xmlChar const *prefix) {
7455 const xmlChar *cmp = name;
7456 const xmlChar *in;
7457 const xmlChar *ret;
7458 const xmlChar *prefix2;
7459
7460 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7461
7462 GROW;
7463 in = ctxt->input->cur;
7464
7465 cmp = prefix;
7466 while (*in != 0 && *in == *cmp) {
7467 ++in;
7468 ++cmp;
7469 }
7470 if ((*cmp == 0) && (*in == ':')) {
7471 in++;
7472 cmp = name;
7473 while (*in != 0 && *in == *cmp) {
7474 ++in;
7475 ++cmp;
7476 }
William M. Brack76e95df2003-10-18 16:20:14 +00007477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007478 /* success */
7479 ctxt->input->cur = in;
7480 return((const xmlChar*) 1);
7481 }
7482 }
7483 /*
7484 * all strings coms from the dictionary, equality can be done directly
7485 */
7486 ret = xmlParseQName (ctxt, &prefix2);
7487 if ((ret == name) && (prefix == prefix2))
7488 return((const xmlChar*) 1);
7489 return ret;
7490}
7491
7492/**
7493 * xmlParseAttValueInternal:
7494 * @ctxt: an XML parser context
7495 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007496 * @alloc: whether the attribute was reallocated as a new string
7497 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007498 *
7499 * parse a value for an attribute.
7500 * NOTE: if no normalization is needed, the routine will return pointers
7501 * directly from the data buffer.
7502 *
7503 * 3.3.3 Attribute-Value Normalization:
7504 * Before the value of an attribute is passed to the application or
7505 * checked for validity, the XML processor must normalize it as follows:
7506 * - a character reference is processed by appending the referenced
7507 * character to the attribute value
7508 * - an entity reference is processed by recursively processing the
7509 * replacement text of the entity
7510 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7511 * appending #x20 to the normalized value, except that only a single
7512 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7513 * parsed entity or the literal entity value of an internal parsed entity
7514 * - other characters are processed by appending them to the normalized value
7515 * If the declared value is not CDATA, then the XML processor must further
7516 * process the normalized attribute value by discarding any leading and
7517 * trailing space (#x20) characters, and by replacing sequences of space
7518 * (#x20) characters by a single space (#x20) character.
7519 * All attributes for which no declaration has been read should be treated
7520 * by a non-validating parser as if declared CDATA.
7521 *
7522 * Returns the AttValue parsed or NULL. The value has to be freed by the
7523 * caller if it was copied, this can be detected by val[*len] == 0.
7524 */
7525
7526static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007527xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7528 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007529{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007531 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007532 xmlChar *ret = NULL;
7533
7534 GROW;
7535 in = (xmlChar *) CUR_PTR;
7536 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007537 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007538 return (NULL);
7539 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007540 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007541
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007542 /*
7543 * try to handle in this routine the most common case where no
7544 * allocation of a new string is required and where content is
7545 * pure ASCII.
7546 */
7547 limit = *in++;
7548 end = ctxt->input->end;
7549 start = in;
7550 if (in >= end) {
7551 const xmlChar *oldbase = ctxt->input->base;
7552 GROW;
7553 if (oldbase != ctxt->input->base) {
7554 long delta = ctxt->input->base - oldbase;
7555 start = start + delta;
7556 in = in + delta;
7557 }
7558 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007559 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007560 if (normalize) {
7561 /*
7562 * Skip any leading spaces
7563 */
7564 while ((in < end) && (*in != limit) &&
7565 ((*in == 0x20) || (*in == 0x9) ||
7566 (*in == 0xA) || (*in == 0xD))) {
7567 in++;
7568 start = in;
7569 if (in >= end) {
7570 const xmlChar *oldbase = ctxt->input->base;
7571 GROW;
7572 if (oldbase != ctxt->input->base) {
7573 long delta = ctxt->input->base - oldbase;
7574 start = start + delta;
7575 in = in + delta;
7576 }
7577 end = ctxt->input->end;
7578 }
7579 }
7580 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7581 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7582 if ((*in++ == 0x20) && (*in == 0x20)) break;
7583 if (in >= end) {
7584 const xmlChar *oldbase = ctxt->input->base;
7585 GROW;
7586 if (oldbase != ctxt->input->base) {
7587 long delta = ctxt->input->base - oldbase;
7588 start = start + delta;
7589 in = in + delta;
7590 }
7591 end = ctxt->input->end;
7592 }
7593 }
7594 last = in;
7595 /*
7596 * skip the trailing blanks
7597 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007598 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007599 while ((in < end) && (*in != limit) &&
7600 ((*in == 0x20) || (*in == 0x9) ||
7601 (*in == 0xA) || (*in == 0xD))) {
7602 in++;
7603 if (in >= end) {
7604 const xmlChar *oldbase = ctxt->input->base;
7605 GROW;
7606 if (oldbase != ctxt->input->base) {
7607 long delta = ctxt->input->base - oldbase;
7608 start = start + delta;
7609 in = in + delta;
7610 last = last + delta;
7611 }
7612 end = ctxt->input->end;
7613 }
7614 }
7615 if (*in != limit) goto need_complex;
7616 } else {
7617 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7618 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7619 in++;
7620 if (in >= end) {
7621 const xmlChar *oldbase = ctxt->input->base;
7622 GROW;
7623 if (oldbase != ctxt->input->base) {
7624 long delta = ctxt->input->base - oldbase;
7625 start = start + delta;
7626 in = in + delta;
7627 }
7628 end = ctxt->input->end;
7629 }
7630 }
7631 last = in;
7632 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007634 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007635 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007636 *len = last - start;
7637 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007639 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007640 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641 }
7642 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007643 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007645need_complex:
7646 if (alloc) *alloc = 1;
7647 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007648}
7649
7650/**
7651 * xmlParseAttribute2:
7652 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007653 * @pref: the element prefix
7654 * @elem: the element name
7655 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007657 * @len: an int * to save the length of the attribute
7658 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007659 *
7660 * parse an attribute in the new SAX2 framework.
7661 *
7662 * Returns the attribute name, and the value in *value, .
7663 */
7664
7665static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007666xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7667 const xmlChar *pref, const xmlChar *elem,
7668 const xmlChar **prefix, xmlChar **value,
7669 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007670 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007671 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007672 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007673
7674 *value = NULL;
7675 GROW;
7676 name = xmlParseQName(ctxt, prefix);
7677 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7679 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007680 return(NULL);
7681 }
7682
7683 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007684 * get the type if needed
7685 */
7686 if (ctxt->attsSpecial != NULL) {
7687 int type;
7688
7689 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7690 pref, elem, *prefix, name);
7691 if (type != 0) normalize = 1;
7692 }
7693
7694 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007695 * read the value
7696 */
7697 SKIP_BLANKS;
7698 if (RAW == '=') {
7699 NEXT;
7700 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007701 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 ctxt->instate = XML_PARSER_CONTENT;
7703 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007704 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007705 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007706 return(NULL);
7707 }
7708
Daniel Veillardd8925572005-06-08 22:34:55 +00007709 if (*prefix == ctxt->str_xml) {
7710 /*
7711 * Check that xml:lang conforms to the specification
7712 * No more registered as an error, just generate a warning now
7713 * since this was deprecated in XML second edition
7714 */
7715 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7716 internal_val = xmlStrndup(val, *len);
7717 if (!xmlCheckLanguageID(internal_val)) {
7718 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7719 "Malformed value for xml:lang : %s\n",
7720 internal_val, NULL);
7721 }
7722 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007723
Daniel Veillardd8925572005-06-08 22:34:55 +00007724 /*
7725 * Check that xml:space conforms to the specification
7726 */
7727 if (xmlStrEqual(name, BAD_CAST "space")) {
7728 internal_val = xmlStrndup(val, *len);
7729 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7730 *(ctxt->space) = 0;
7731 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7732 *(ctxt->space) = 1;
7733 else {
7734 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007735"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007736 internal_val, NULL);
7737 }
7738 }
7739 if (internal_val) {
7740 xmlFree(internal_val);
7741 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007742 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007743
7744 *value = val;
7745 return(name);
7746}
7747
7748/**
7749 * xmlParseStartTag2:
7750 * @ctxt: an XML parser context
7751 *
7752 * parse a start of tag either for rule element or
7753 * EmptyElement. In both case we don't parse the tag closing chars.
7754 * This routine is called when running SAX2 parsing
7755 *
7756 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7757 *
7758 * [ WFC: Unique Att Spec ]
7759 * No attribute name may appear more than once in the same start-tag or
7760 * empty-element tag.
7761 *
7762 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7763 *
7764 * [ WFC: Unique Att Spec ]
7765 * No attribute name may appear more than once in the same start-tag or
7766 * empty-element tag.
7767 *
7768 * With namespace:
7769 *
7770 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7771 *
7772 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7773 *
7774 * Returns the element name parsed
7775 */
7776
7777static const xmlChar *
7778xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007779 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007780 const xmlChar *localname;
7781 const xmlChar *prefix;
7782 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007783 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007784 const xmlChar *nsname;
7785 xmlChar *attvalue;
7786 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007788 int nratts, nbatts, nbdef;
7789 int i, j, nbNs, attval;
7790 const xmlChar *base;
7791 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007792 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793
7794 if (RAW != '<') return(NULL);
7795 NEXT1;
7796
7797 /*
7798 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7799 * point since the attribute values may be stored as pointers to
7800 * the buffer and calling SHRINK would destroy them !
7801 * The Shrinking is only possible once the full set of attribute
7802 * callbacks have been done.
7803 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007804reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007805 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007806 base = ctxt->input->base;
7807 cur = ctxt->input->cur - ctxt->input->base;
7808 nbatts = 0;
7809 nratts = 0;
7810 nbdef = 0;
7811 nbNs = 0;
7812 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007813 /* Forget any namespaces added during an earlier parse of this element. */
7814 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815
7816 localname = xmlParseQName(ctxt, &prefix);
7817 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007818 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7819 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007820 return(NULL);
7821 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007822 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007823
7824 /*
7825 * Now parse the attributes, it ends up with the ending
7826 *
7827 * (S Attribute)* S?
7828 */
7829 SKIP_BLANKS;
7830 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007831 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007832
7833 while ((RAW != '>') &&
7834 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007835 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007836 const xmlChar *q = CUR_PTR;
7837 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007838 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007839
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007840 attname = xmlParseAttribute2(ctxt, prefix, localname,
7841 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007842 if ((attname != NULL) && (attvalue != NULL)) {
7843 if (len < 0) len = xmlStrlen(attvalue);
7844 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007845 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7846 xmlURIPtr uri;
7847
7848 if (*URL != 0) {
7849 uri = xmlParseURI((const char *) URL);
7850 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007851 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7852 "xmlns: %s not a valid URI\n",
7853 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007854 } else {
7855 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007856 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7857 "xmlns: URI %s is not absolute\n",
7858 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007859 }
7860 xmlFreeURI(uri);
7861 }
7862 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007864 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007866 for (j = 1;j <= nbNs;j++)
7867 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7868 break;
7869 if (j <= nbNs)
7870 xmlErrAttributeDup(ctxt, NULL, attname);
7871 else
7872 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007873 if (alloc != 0) xmlFree(attvalue);
7874 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007875 continue;
7876 }
7877 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007878 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7879 xmlURIPtr uri;
7880
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007881 if (attname == ctxt->str_xml) {
7882 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007883 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7884 "xml namespace prefix mapped to wrong URI\n",
7885 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007886 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007887 /*
7888 * Do not keep a namespace definition node
7889 */
7890 if (alloc != 0) xmlFree(attvalue);
7891 SKIP_BLANKS;
7892 continue;
7893 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007894 uri = xmlParseURI((const char *) URL);
7895 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007896 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7897 "xmlns:%s: '%s' is not a valid URI\n",
7898 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007899 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007900 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007901 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7902 "xmlns:%s: URI %s is not absolute\n",
7903 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007904 }
7905 xmlFreeURI(uri);
7906 }
7907
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007909 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007910 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007911 for (j = 1;j <= nbNs;j++)
7912 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7913 break;
7914 if (j <= nbNs)
7915 xmlErrAttributeDup(ctxt, aprefix, attname);
7916 else
7917 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007918 if (alloc != 0) xmlFree(attvalue);
7919 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007920 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921 continue;
7922 }
7923
7924 /*
7925 * Add the pair to atts
7926 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007927 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7928 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 if (attvalue[len] == 0)
7930 xmlFree(attvalue);
7931 goto failed;
7932 }
7933 maxatts = ctxt->maxatts;
7934 atts = ctxt->atts;
7935 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007936 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007937 atts[nbatts++] = attname;
7938 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007939 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 atts[nbatts++] = attvalue;
7941 attvalue += len;
7942 atts[nbatts++] = attvalue;
7943 /*
7944 * tag if some deallocation is needed
7945 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007946 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007947 } else {
7948 if ((attvalue != NULL) && (attvalue[len] == 0))
7949 xmlFree(attvalue);
7950 }
7951
7952failed:
7953
7954 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007955 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007956 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7957 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007958 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7960 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007961 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 }
7963 SKIP_BLANKS;
7964 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7965 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007966 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007967 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 break;
7969 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007971 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 }
7973
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007975 * The attributes defaulting
7976 */
7977 if (ctxt->attsDefault != NULL) {
7978 xmlDefAttrsPtr defaults;
7979
7980 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7981 if (defaults != NULL) {
7982 for (i = 0;i < defaults->nbAttrs;i++) {
7983 attname = defaults->values[4 * i];
7984 aprefix = defaults->values[4 * i + 1];
7985
7986 /*
7987 * special work for namespaces defaulted defs
7988 */
7989 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7990 /*
7991 * check that it's not a defined namespace
7992 */
7993 for (j = 1;j <= nbNs;j++)
7994 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7995 break;
7996 if (j <= nbNs) continue;
7997
7998 nsname = xmlGetNamespace(ctxt, NULL);
7999 if (nsname != defaults->values[4 * i + 2]) {
8000 if (nsPush(ctxt, NULL,
8001 defaults->values[4 * i + 2]) > 0)
8002 nbNs++;
8003 }
8004 } else if (aprefix == ctxt->str_xmlns) {
8005 /*
8006 * check that it's not a defined namespace
8007 */
8008 for (j = 1;j <= nbNs;j++)
8009 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8010 break;
8011 if (j <= nbNs) continue;
8012
8013 nsname = xmlGetNamespace(ctxt, attname);
8014 if (nsname != defaults->values[2]) {
8015 if (nsPush(ctxt, attname,
8016 defaults->values[4 * i + 2]) > 0)
8017 nbNs++;
8018 }
8019 } else {
8020 /*
8021 * check that it's not a defined attribute
8022 */
8023 for (j = 0;j < nbatts;j+=5) {
8024 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8025 break;
8026 }
8027 if (j < nbatts) continue;
8028
8029 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8030 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008031 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008032 }
8033 maxatts = ctxt->maxatts;
8034 atts = ctxt->atts;
8035 }
8036 atts[nbatts++] = attname;
8037 atts[nbatts++] = aprefix;
8038 if (aprefix == NULL)
8039 atts[nbatts++] = NULL;
8040 else
8041 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8042 atts[nbatts++] = defaults->values[4 * i + 2];
8043 atts[nbatts++] = defaults->values[4 * i + 3];
8044 nbdef++;
8045 }
8046 }
8047 }
8048 }
8049
Daniel Veillarde70c8772003-11-25 07:21:18 +00008050 /*
8051 * The attributes checkings
8052 */
8053 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008054 /*
8055 * The default namespace does not apply to attribute names.
8056 */
8057 if (atts[i + 1] != NULL) {
8058 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8059 if (nsname == NULL) {
8060 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8061 "Namespace prefix %s for %s on %s is not defined\n",
8062 atts[i + 1], atts[i], localname);
8063 }
8064 atts[i + 2] = nsname;
8065 } else
8066 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008067 /*
8068 * [ WFC: Unique Att Spec ]
8069 * No attribute name may appear more than once in the same
8070 * start-tag or empty-element tag.
8071 * As extended by the Namespace in XML REC.
8072 */
8073 for (j = 0; j < i;j += 5) {
8074 if (atts[i] == atts[j]) {
8075 if (atts[i+1] == atts[j+1]) {
8076 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8077 break;
8078 }
8079 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8080 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8081 "Namespaced Attribute %s in '%s' redefined\n",
8082 atts[i], nsname, NULL);
8083 break;
8084 }
8085 }
8086 }
8087 }
8088
Daniel Veillarde57ec792003-09-10 10:50:59 +00008089 nsname = xmlGetNamespace(ctxt, prefix);
8090 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008091 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8092 "Namespace prefix %s on %s is not defined\n",
8093 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008094 }
8095 *pref = prefix;
8096 *URI = nsname;
8097
8098 /*
8099 * SAX: Start of Element !
8100 */
8101 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8102 (!ctxt->disableSAX)) {
8103 if (nbNs > 0)
8104 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8105 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8106 nbatts / 5, nbdef, atts);
8107 else
8108 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8109 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8110 }
8111
8112 /*
8113 * Free up attribute allocated strings if needed
8114 */
8115 if (attval != 0) {
8116 for (i = 3,j = 0; j < nratts;i += 5,j++)
8117 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8118 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008119 }
8120
8121 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008122
8123base_changed:
8124 /*
8125 * the attribute strings are valid iif the base didn't changed
8126 */
8127 if (attval != 0) {
8128 for (i = 3,j = 0; j < nratts;i += 5,j++)
8129 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8130 xmlFree((xmlChar *) atts[i]);
8131 }
8132 ctxt->input->cur = ctxt->input->base + cur;
8133 if (ctxt->wellFormed == 1) {
8134 goto reparse;
8135 }
8136 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008137}
8138
8139/**
8140 * xmlParseEndTag2:
8141 * @ctxt: an XML parser context
8142 * @line: line of the start tag
8143 * @nsNr: number of namespaces on the start tag
8144 *
8145 * parse an end of tag
8146 *
8147 * [42] ETag ::= '</' Name S? '>'
8148 *
8149 * With namespace
8150 *
8151 * [NS 9] ETag ::= '</' QName S? '>'
8152 */
8153
8154static void
8155xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008156 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008157 const xmlChar *name;
8158
8159 GROW;
8160 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008161 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008162 return;
8163 }
8164 SKIP(2);
8165
William M. Brack13dfa872004-09-18 04:52:08 +00008166 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008167 if (ctxt->input->cur[tlen] == '>') {
8168 ctxt->input->cur += tlen + 1;
8169 goto done;
8170 }
8171 ctxt->input->cur += tlen;
8172 name = (xmlChar*)1;
8173 } else {
8174 if (prefix == NULL)
8175 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8176 else
8177 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8178 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179
8180 /*
8181 * We should definitely be at the ending "S? '>'" part
8182 */
8183 GROW;
8184 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008185 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008186 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008187 } else
8188 NEXT1;
8189
8190 /*
8191 * [ WFC: Element Type Match ]
8192 * The Name in an element's end-tag must match the element type in the
8193 * start-tag.
8194 *
8195 */
8196 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008197 if (name == NULL) name = BAD_CAST "unparseable";
8198 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008199 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008200 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 }
8202
8203 /*
8204 * SAX: End of Tag
8205 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008206done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8208 (!ctxt->disableSAX))
8209 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8210
Daniel Veillard0fb18932003-09-07 09:14:37 +00008211 spacePop(ctxt);
8212 if (nsNr != 0)
8213 nsPop(ctxt, nsNr);
8214 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008215}
8216
8217/**
Owen Taylor3473f882001-02-23 17:55:21 +00008218 * xmlParseCDSect:
8219 * @ctxt: an XML parser context
8220 *
8221 * Parse escaped pure raw content.
8222 *
8223 * [18] CDSect ::= CDStart CData CDEnd
8224 *
8225 * [19] CDStart ::= '<![CDATA['
8226 *
8227 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8228 *
8229 * [21] CDEnd ::= ']]>'
8230 */
8231void
8232xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8233 xmlChar *buf = NULL;
8234 int len = 0;
8235 int size = XML_PARSER_BUFFER_SIZE;
8236 int r, rl;
8237 int s, sl;
8238 int cur, l;
8239 int count = 0;
8240
Daniel Veillard8f597c32003-10-06 08:19:27 +00008241 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008242 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008243 SKIP(9);
8244 } else
8245 return;
8246
8247 ctxt->instate = XML_PARSER_CDATA_SECTION;
8248 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008249 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008250 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008251 ctxt->instate = XML_PARSER_CONTENT;
8252 return;
8253 }
8254 NEXTL(rl);
8255 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008256 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008257 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008258 ctxt->instate = XML_PARSER_CONTENT;
8259 return;
8260 }
8261 NEXTL(sl);
8262 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008263 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008264 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008265 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008266 return;
8267 }
William M. Brack871611b2003-10-18 04:53:14 +00008268 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008269 ((r != ']') || (s != ']') || (cur != '>'))) {
8270 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008271 xmlChar *tmp;
8272
Owen Taylor3473f882001-02-23 17:55:21 +00008273 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008274 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8275 if (tmp == NULL) {
8276 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008277 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008278 return;
8279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008281 }
8282 COPY_BUF(rl,buf,len,r);
8283 r = s;
8284 rl = sl;
8285 s = cur;
8286 sl = l;
8287 count++;
8288 if (count > 50) {
8289 GROW;
8290 count = 0;
8291 }
8292 NEXTL(l);
8293 cur = CUR_CHAR(l);
8294 }
8295 buf[len] = 0;
8296 ctxt->instate = XML_PARSER_CONTENT;
8297 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008298 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008299 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008300 xmlFree(buf);
8301 return;
8302 }
8303 NEXTL(l);
8304
8305 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008306 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008307 */
8308 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8309 if (ctxt->sax->cdataBlock != NULL)
8310 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008311 else if (ctxt->sax->characters != NULL)
8312 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008313 }
8314 xmlFree(buf);
8315}
8316
8317/**
8318 * xmlParseContent:
8319 * @ctxt: an XML parser context
8320 *
8321 * Parse a content:
8322 *
8323 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8324 */
8325
8326void
8327xmlParseContent(xmlParserCtxtPtr ctxt) {
8328 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008329 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008330 ((RAW != '<') || (NXT(1) != '/'))) {
8331 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008332 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008333 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008334
8335 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008336 * First case : a Processing Instruction.
8337 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008338 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008339 xmlParsePI(ctxt);
8340 }
8341
8342 /*
8343 * Second case : a CDSection
8344 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008345 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008346 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008347 xmlParseCDSect(ctxt);
8348 }
8349
8350 /*
8351 * Third case : a comment
8352 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008353 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008354 (NXT(2) == '-') && (NXT(3) == '-')) {
8355 xmlParseComment(ctxt);
8356 ctxt->instate = XML_PARSER_CONTENT;
8357 }
8358
8359 /*
8360 * Fourth case : a sub-element.
8361 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008362 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008363 xmlParseElement(ctxt);
8364 }
8365
8366 /*
8367 * Fifth case : a reference. If if has not been resolved,
8368 * parsing returns it's Name, create the node
8369 */
8370
Daniel Veillard21a0f912001-02-25 19:54:14 +00008371 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008372 xmlParseReference(ctxt);
8373 }
8374
8375 /*
8376 * Last case, text. Note that References are handled directly.
8377 */
8378 else {
8379 xmlParseCharData(ctxt, 0);
8380 }
8381
8382 GROW;
8383 /*
8384 * Pop-up of finished entities.
8385 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008386 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008387 xmlPopInput(ctxt);
8388 SHRINK;
8389
Daniel Veillardfdc91562002-07-01 21:52:03 +00008390 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008391 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8392 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008393 ctxt->instate = XML_PARSER_EOF;
8394 break;
8395 }
8396 }
8397}
8398
8399/**
8400 * xmlParseElement:
8401 * @ctxt: an XML parser context
8402 *
8403 * parse an XML element, this is highly recursive
8404 *
8405 * [39] element ::= EmptyElemTag | STag content ETag
8406 *
8407 * [ WFC: Element Type Match ]
8408 * The Name in an element's end-tag must match the element type in the
8409 * start-tag.
8410 *
Owen Taylor3473f882001-02-23 17:55:21 +00008411 */
8412
8413void
8414xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008415 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008416 const xmlChar *prefix;
8417 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008418 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008419 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008420 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008421 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008422
8423 /* Capture start position */
8424 if (ctxt->record_info) {
8425 node_info.begin_pos = ctxt->input->consumed +
8426 (CUR_PTR - ctxt->input->base);
8427 node_info.begin_line = ctxt->input->line;
8428 }
8429
8430 if (ctxt->spaceNr == 0)
8431 spacePush(ctxt, -1);
8432 else
8433 spacePush(ctxt, *ctxt->space);
8434
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008435 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008436#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008437 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008438#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008439 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008440#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008441 else
8442 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008443#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008444 if (name == NULL) {
8445 spacePop(ctxt);
8446 return;
8447 }
8448 namePush(ctxt, name);
8449 ret = ctxt->node;
8450
Daniel Veillard4432df22003-09-28 18:58:27 +00008451#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008452 /*
8453 * [ VC: Root Element Type ]
8454 * The Name in the document type declaration must match the element
8455 * type of the root element.
8456 */
8457 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8458 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8459 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008460#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008461
8462 /*
8463 * Check for an Empty Element.
8464 */
8465 if ((RAW == '/') && (NXT(1) == '>')) {
8466 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008467 if (ctxt->sax2) {
8468 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8469 (!ctxt->disableSAX))
8470 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008471#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008472 } else {
8473 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8474 (!ctxt->disableSAX))
8475 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008476#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008477 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008478 namePop(ctxt);
8479 spacePop(ctxt);
8480 if (nsNr != ctxt->nsNr)
8481 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008482 if ( ret != NULL && ctxt->record_info ) {
8483 node_info.end_pos = ctxt->input->consumed +
8484 (CUR_PTR - ctxt->input->base);
8485 node_info.end_line = ctxt->input->line;
8486 node_info.node = ret;
8487 xmlParserAddNodeInfo(ctxt, &node_info);
8488 }
8489 return;
8490 }
8491 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008492 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008493 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008494 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8495 "Couldn't find end of Start Tag %s line %d\n",
8496 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008497
8498 /*
8499 * end of parsing of this node.
8500 */
8501 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008502 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008503 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008504 if (nsNr != ctxt->nsNr)
8505 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008506
8507 /*
8508 * Capture end position and add node
8509 */
8510 if ( ret != NULL && ctxt->record_info ) {
8511 node_info.end_pos = ctxt->input->consumed +
8512 (CUR_PTR - ctxt->input->base);
8513 node_info.end_line = ctxt->input->line;
8514 node_info.node = ret;
8515 xmlParserAddNodeInfo(ctxt, &node_info);
8516 }
8517 return;
8518 }
8519
8520 /*
8521 * Parse the content of the element:
8522 */
8523 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008524 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008525 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008526 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008527 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008528
8529 /*
8530 * end of parsing of this node.
8531 */
8532 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008533 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008534 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008535 if (nsNr != ctxt->nsNr)
8536 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008537 return;
8538 }
8539
8540 /*
8541 * parse the end of tag: '</' should be here.
8542 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008543 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008544 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008545 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008546 }
8547#ifdef LIBXML_SAX1_ENABLED
8548 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008549 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008550#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008551
8552 /*
8553 * Capture end position and add node
8554 */
8555 if ( ret != NULL && ctxt->record_info ) {
8556 node_info.end_pos = ctxt->input->consumed +
8557 (CUR_PTR - ctxt->input->base);
8558 node_info.end_line = ctxt->input->line;
8559 node_info.node = ret;
8560 xmlParserAddNodeInfo(ctxt, &node_info);
8561 }
8562}
8563
8564/**
8565 * xmlParseVersionNum:
8566 * @ctxt: an XML parser context
8567 *
8568 * parse the XML version value.
8569 *
8570 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8571 *
8572 * Returns the string giving the XML version number, or NULL
8573 */
8574xmlChar *
8575xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8576 xmlChar *buf = NULL;
8577 int len = 0;
8578 int size = 10;
8579 xmlChar cur;
8580
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008582 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008583 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 return(NULL);
8585 }
8586 cur = CUR;
8587 while (((cur >= 'a') && (cur <= 'z')) ||
8588 ((cur >= 'A') && (cur <= 'Z')) ||
8589 ((cur >= '0') && (cur <= '9')) ||
8590 (cur == '_') || (cur == '.') ||
8591 (cur == ':') || (cur == '-')) {
8592 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008593 xmlChar *tmp;
8594
Owen Taylor3473f882001-02-23 17:55:21 +00008595 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008596 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8597 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 return(NULL);
8600 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008601 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008602 }
8603 buf[len++] = cur;
8604 NEXT;
8605 cur=CUR;
8606 }
8607 buf[len] = 0;
8608 return(buf);
8609}
8610
8611/**
8612 * xmlParseVersionInfo:
8613 * @ctxt: an XML parser context
8614 *
8615 * parse the XML version.
8616 *
8617 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8618 *
8619 * [25] Eq ::= S? '=' S?
8620 *
8621 * Returns the version string, e.g. "1.0"
8622 */
8623
8624xmlChar *
8625xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8626 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008627
Daniel Veillarda07050d2003-10-19 14:46:32 +00008628 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008629 SKIP(7);
8630 SKIP_BLANKS;
8631 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008632 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008633 return(NULL);
8634 }
8635 NEXT;
8636 SKIP_BLANKS;
8637 if (RAW == '"') {
8638 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008639 version = xmlParseVersionNum(ctxt);
8640 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008641 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008642 } else
8643 NEXT;
8644 } else if (RAW == '\''){
8645 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008646 version = xmlParseVersionNum(ctxt);
8647 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008648 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008649 } else
8650 NEXT;
8651 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008652 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008653 }
8654 }
8655 return(version);
8656}
8657
8658/**
8659 * xmlParseEncName:
8660 * @ctxt: an XML parser context
8661 *
8662 * parse the XML encoding name
8663 *
8664 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8665 *
8666 * Returns the encoding name value or NULL
8667 */
8668xmlChar *
8669xmlParseEncName(xmlParserCtxtPtr ctxt) {
8670 xmlChar *buf = NULL;
8671 int len = 0;
8672 int size = 10;
8673 xmlChar cur;
8674
8675 cur = CUR;
8676 if (((cur >= 'a') && (cur <= 'z')) ||
8677 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008678 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008679 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008680 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008681 return(NULL);
8682 }
8683
8684 buf[len++] = cur;
8685 NEXT;
8686 cur = CUR;
8687 while (((cur >= 'a') && (cur <= 'z')) ||
8688 ((cur >= 'A') && (cur <= 'Z')) ||
8689 ((cur >= '0') && (cur <= '9')) ||
8690 (cur == '.') || (cur == '_') ||
8691 (cur == '-')) {
8692 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008693 xmlChar *tmp;
8694
Owen Taylor3473f882001-02-23 17:55:21 +00008695 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8697 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008699 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008700 return(NULL);
8701 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008702 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008703 }
8704 buf[len++] = cur;
8705 NEXT;
8706 cur = CUR;
8707 if (cur == 0) {
8708 SHRINK;
8709 GROW;
8710 cur = CUR;
8711 }
8712 }
8713 buf[len] = 0;
8714 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008715 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008716 }
8717 return(buf);
8718}
8719
8720/**
8721 * xmlParseEncodingDecl:
8722 * @ctxt: an XML parser context
8723 *
8724 * parse the XML encoding declaration
8725 *
8726 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8727 *
8728 * this setups the conversion filters.
8729 *
8730 * Returns the encoding value or NULL
8731 */
8732
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008733const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008734xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8735 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008736
8737 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008738 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008739 SKIP(8);
8740 SKIP_BLANKS;
8741 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008742 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008743 return(NULL);
8744 }
8745 NEXT;
8746 SKIP_BLANKS;
8747 if (RAW == '"') {
8748 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008749 encoding = xmlParseEncName(ctxt);
8750 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008751 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008752 } else
8753 NEXT;
8754 } else if (RAW == '\''){
8755 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008756 encoding = xmlParseEncName(ctxt);
8757 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008758 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008759 } else
8760 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008761 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008762 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008763 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008764 /*
8765 * UTF-16 encoding stwich has already taken place at this stage,
8766 * more over the little-endian/big-endian selection is already done
8767 */
8768 if ((encoding != NULL) &&
8769 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8770 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008771 if (ctxt->encoding != NULL)
8772 xmlFree((xmlChar *) ctxt->encoding);
8773 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008774 }
8775 /*
8776 * UTF-8 encoding is handled natively
8777 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008778 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008779 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8780 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008781 if (ctxt->encoding != NULL)
8782 xmlFree((xmlChar *) ctxt->encoding);
8783 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008784 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008785 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008786 xmlCharEncodingHandlerPtr handler;
8787
8788 if (ctxt->input->encoding != NULL)
8789 xmlFree((xmlChar *) ctxt->input->encoding);
8790 ctxt->input->encoding = encoding;
8791
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008792 handler = xmlFindCharEncodingHandler((const char *) encoding);
8793 if (handler != NULL) {
8794 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008795 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008796 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008797 "Unsupported encoding %s\n", encoding);
8798 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008799 }
8800 }
8801 }
8802 return(encoding);
8803}
8804
8805/**
8806 * xmlParseSDDecl:
8807 * @ctxt: an XML parser context
8808 *
8809 * parse the XML standalone declaration
8810 *
8811 * [32] SDDecl ::= S 'standalone' Eq
8812 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8813 *
8814 * [ VC: Standalone Document Declaration ]
8815 * TODO The standalone document declaration must have the value "no"
8816 * if any external markup declarations contain declarations of:
8817 * - attributes with default values, if elements to which these
8818 * attributes apply appear in the document without specifications
8819 * of values for these attributes, or
8820 * - entities (other than amp, lt, gt, apos, quot), if references
8821 * to those entities appear in the document, or
8822 * - attributes with values subject to normalization, where the
8823 * attribute appears in the document with a value which will change
8824 * as a result of normalization, or
8825 * - element types with element content, if white space occurs directly
8826 * within any instance of those types.
8827 *
8828 * Returns 1 if standalone, 0 otherwise
8829 */
8830
8831int
8832xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8833 int standalone = -1;
8834
8835 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008836 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008837 SKIP(10);
8838 SKIP_BLANKS;
8839 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008840 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008841 return(standalone);
8842 }
8843 NEXT;
8844 SKIP_BLANKS;
8845 if (RAW == '\''){
8846 NEXT;
8847 if ((RAW == 'n') && (NXT(1) == 'o')) {
8848 standalone = 0;
8849 SKIP(2);
8850 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8851 (NXT(2) == 's')) {
8852 standalone = 1;
8853 SKIP(3);
8854 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008855 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008856 }
8857 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008858 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008859 } else
8860 NEXT;
8861 } else if (RAW == '"'){
8862 NEXT;
8863 if ((RAW == 'n') && (NXT(1) == 'o')) {
8864 standalone = 0;
8865 SKIP(2);
8866 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8867 (NXT(2) == 's')) {
8868 standalone = 1;
8869 SKIP(3);
8870 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008871 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008872 }
8873 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008874 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008875 } else
8876 NEXT;
8877 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008878 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008879 }
8880 }
8881 return(standalone);
8882}
8883
8884/**
8885 * xmlParseXMLDecl:
8886 * @ctxt: an XML parser context
8887 *
8888 * parse an XML declaration header
8889 *
8890 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8891 */
8892
8893void
8894xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8895 xmlChar *version;
8896
8897 /*
8898 * We know that '<?xml' is here.
8899 */
8900 SKIP(5);
8901
William M. Brack76e95df2003-10-18 16:20:14 +00008902 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008903 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8904 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008905 }
8906 SKIP_BLANKS;
8907
8908 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008909 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008910 */
8911 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008912 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008913 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008914 } else {
8915 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8916 /*
8917 * TODO: Blueberry should be detected here
8918 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008919 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8920 "Unsupported version '%s'\n",
8921 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008922 }
8923 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008924 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008925 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008926 }
Owen Taylor3473f882001-02-23 17:55:21 +00008927
8928 /*
8929 * We may have the encoding declaration
8930 */
William M. Brack76e95df2003-10-18 16:20:14 +00008931 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008932 if ((RAW == '?') && (NXT(1) == '>')) {
8933 SKIP(2);
8934 return;
8935 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008936 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008937 }
8938 xmlParseEncodingDecl(ctxt);
8939 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8940 /*
8941 * The XML REC instructs us to stop parsing right here
8942 */
8943 return;
8944 }
8945
8946 /*
8947 * We may have the standalone status.
8948 */
William M. Brack76e95df2003-10-18 16:20:14 +00008949 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008950 if ((RAW == '?') && (NXT(1) == '>')) {
8951 SKIP(2);
8952 return;
8953 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008954 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008955 }
8956 SKIP_BLANKS;
8957 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8958
8959 SKIP_BLANKS;
8960 if ((RAW == '?') && (NXT(1) == '>')) {
8961 SKIP(2);
8962 } else if (RAW == '>') {
8963 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008964 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 NEXT;
8966 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008967 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008968 MOVETO_ENDTAG(CUR_PTR);
8969 NEXT;
8970 }
8971}
8972
8973/**
8974 * xmlParseMisc:
8975 * @ctxt: an XML parser context
8976 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008977 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008978 *
8979 * [27] Misc ::= Comment | PI | S
8980 */
8981
8982void
8983xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008984 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008985 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008986 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008987 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008988 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008989 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008990 NEXT;
8991 } else
8992 xmlParseComment(ctxt);
8993 }
8994}
8995
8996/**
8997 * xmlParseDocument:
8998 * @ctxt: an XML parser context
8999 *
9000 * parse an XML document (and build a tree if using the standard SAX
9001 * interface).
9002 *
9003 * [1] document ::= prolog element Misc*
9004 *
9005 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9006 *
9007 * Returns 0, -1 in case of error. the parser context is augmented
9008 * as a result of the parsing.
9009 */
9010
9011int
9012xmlParseDocument(xmlParserCtxtPtr ctxt) {
9013 xmlChar start[4];
9014 xmlCharEncoding enc;
9015
9016 xmlInitParser();
9017
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009018 if ((ctxt == NULL) || (ctxt->input == NULL))
9019 return(-1);
9020
Owen Taylor3473f882001-02-23 17:55:21 +00009021 GROW;
9022
9023 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 * SAX: detecting the level.
9025 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009026 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009027
9028 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009029 * SAX: beginning of the document processing.
9030 */
9031 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9032 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9033
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009034 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9035 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009036 /*
9037 * Get the 4 first bytes and decode the charset
9038 * if enc != XML_CHAR_ENCODING_NONE
9039 * plug some encoding conversion routines.
9040 */
9041 start[0] = RAW;
9042 start[1] = NXT(1);
9043 start[2] = NXT(2);
9044 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009045 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009046 if (enc != XML_CHAR_ENCODING_NONE) {
9047 xmlSwitchEncoding(ctxt, enc);
9048 }
Owen Taylor3473f882001-02-23 17:55:21 +00009049 }
9050
9051
9052 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009053 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009054 }
9055
9056 /*
9057 * Check for the XMLDecl in the Prolog.
9058 */
9059 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009060 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009061
9062 /*
9063 * Note that we will switch encoding on the fly.
9064 */
9065 xmlParseXMLDecl(ctxt);
9066 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9067 /*
9068 * The XML REC instructs us to stop parsing right here
9069 */
9070 return(-1);
9071 }
9072 ctxt->standalone = ctxt->input->standalone;
9073 SKIP_BLANKS;
9074 } else {
9075 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9076 }
9077 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9078 ctxt->sax->startDocument(ctxt->userData);
9079
9080 /*
9081 * The Misc part of the Prolog
9082 */
9083 GROW;
9084 xmlParseMisc(ctxt);
9085
9086 /*
9087 * Then possibly doc type declaration(s) and more Misc
9088 * (doctypedecl Misc*)?
9089 */
9090 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009091 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009092
9093 ctxt->inSubset = 1;
9094 xmlParseDocTypeDecl(ctxt);
9095 if (RAW == '[') {
9096 ctxt->instate = XML_PARSER_DTD;
9097 xmlParseInternalSubset(ctxt);
9098 }
9099
9100 /*
9101 * Create and update the external subset.
9102 */
9103 ctxt->inSubset = 2;
9104 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9105 (!ctxt->disableSAX))
9106 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9107 ctxt->extSubSystem, ctxt->extSubURI);
9108 ctxt->inSubset = 0;
9109
9110
9111 ctxt->instate = XML_PARSER_PROLOG;
9112 xmlParseMisc(ctxt);
9113 }
9114
9115 /*
9116 * Time to start parsing the tree itself
9117 */
9118 GROW;
9119 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009120 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9121 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009122 } else {
9123 ctxt->instate = XML_PARSER_CONTENT;
9124 xmlParseElement(ctxt);
9125 ctxt->instate = XML_PARSER_EPILOG;
9126
9127
9128 /*
9129 * The Misc part at the end
9130 */
9131 xmlParseMisc(ctxt);
9132
Daniel Veillard561b7f82002-03-20 21:55:57 +00009133 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009134 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009135 }
9136 ctxt->instate = XML_PARSER_EOF;
9137 }
9138
9139 /*
9140 * SAX: end of the document processing.
9141 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009142 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009143 ctxt->sax->endDocument(ctxt->userData);
9144
Daniel Veillard5997aca2002-03-18 18:36:20 +00009145 /*
9146 * Remove locally kept entity definitions if the tree was not built
9147 */
9148 if ((ctxt->myDoc != NULL) &&
9149 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9150 xmlFreeDoc(ctxt->myDoc);
9151 ctxt->myDoc = NULL;
9152 }
9153
Daniel Veillardc7612992002-02-17 22:47:37 +00009154 if (! ctxt->wellFormed) {
9155 ctxt->valid = 0;
9156 return(-1);
9157 }
Owen Taylor3473f882001-02-23 17:55:21 +00009158 return(0);
9159}
9160
9161/**
9162 * xmlParseExtParsedEnt:
9163 * @ctxt: an XML parser context
9164 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009165 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009166 * An external general parsed entity is well-formed if it matches the
9167 * production labeled extParsedEnt.
9168 *
9169 * [78] extParsedEnt ::= TextDecl? content
9170 *
9171 * Returns 0, -1 in case of error. the parser context is augmented
9172 * as a result of the parsing.
9173 */
9174
9175int
9176xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9177 xmlChar start[4];
9178 xmlCharEncoding enc;
9179
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009180 if ((ctxt == NULL) || (ctxt->input == NULL))
9181 return(-1);
9182
Owen Taylor3473f882001-02-23 17:55:21 +00009183 xmlDefaultSAXHandlerInit();
9184
Daniel Veillard309f81d2003-09-23 09:02:53 +00009185 xmlDetectSAX2(ctxt);
9186
Owen Taylor3473f882001-02-23 17:55:21 +00009187 GROW;
9188
9189 /*
9190 * SAX: beginning of the document processing.
9191 */
9192 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9193 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9194
9195 /*
9196 * Get the 4 first bytes and decode the charset
9197 * if enc != XML_CHAR_ENCODING_NONE
9198 * plug some encoding conversion routines.
9199 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009200 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9201 start[0] = RAW;
9202 start[1] = NXT(1);
9203 start[2] = NXT(2);
9204 start[3] = NXT(3);
9205 enc = xmlDetectCharEncoding(start, 4);
9206 if (enc != XML_CHAR_ENCODING_NONE) {
9207 xmlSwitchEncoding(ctxt, enc);
9208 }
Owen Taylor3473f882001-02-23 17:55:21 +00009209 }
9210
9211
9212 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009213 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009214 }
9215
9216 /*
9217 * Check for the XMLDecl in the Prolog.
9218 */
9219 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009220 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009221
9222 /*
9223 * Note that we will switch encoding on the fly.
9224 */
9225 xmlParseXMLDecl(ctxt);
9226 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9227 /*
9228 * The XML REC instructs us to stop parsing right here
9229 */
9230 return(-1);
9231 }
9232 SKIP_BLANKS;
9233 } else {
9234 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9235 }
9236 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9237 ctxt->sax->startDocument(ctxt->userData);
9238
9239 /*
9240 * Doing validity checking on chunk doesn't make sense
9241 */
9242 ctxt->instate = XML_PARSER_CONTENT;
9243 ctxt->validate = 0;
9244 ctxt->loadsubset = 0;
9245 ctxt->depth = 0;
9246
9247 xmlParseContent(ctxt);
9248
9249 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009250 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009251 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009252 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009253 }
9254
9255 /*
9256 * SAX: end of the document processing.
9257 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009258 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009259 ctxt->sax->endDocument(ctxt->userData);
9260
9261 if (! ctxt->wellFormed) return(-1);
9262 return(0);
9263}
9264
Daniel Veillard73b013f2003-09-30 12:36:01 +00009265#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009266/************************************************************************
9267 * *
9268 * Progressive parsing interfaces *
9269 * *
9270 ************************************************************************/
9271
9272/**
9273 * xmlParseLookupSequence:
9274 * @ctxt: an XML parser context
9275 * @first: the first char to lookup
9276 * @next: the next char to lookup or zero
9277 * @third: the next char to lookup or zero
9278 *
9279 * Try to find if a sequence (first, next, third) or just (first next) or
9280 * (first) is available in the input stream.
9281 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9282 * to avoid rescanning sequences of bytes, it DOES change the state of the
9283 * parser, do not use liberally.
9284 *
9285 * Returns the index to the current parsing point if the full sequence
9286 * is available, -1 otherwise.
9287 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009288static int
Owen Taylor3473f882001-02-23 17:55:21 +00009289xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9290 xmlChar next, xmlChar third) {
9291 int base, len;
9292 xmlParserInputPtr in;
9293 const xmlChar *buf;
9294
9295 in = ctxt->input;
9296 if (in == NULL) return(-1);
9297 base = in->cur - in->base;
9298 if (base < 0) return(-1);
9299 if (ctxt->checkIndex > base)
9300 base = ctxt->checkIndex;
9301 if (in->buf == NULL) {
9302 buf = in->base;
9303 len = in->length;
9304 } else {
9305 buf = in->buf->buffer->content;
9306 len = in->buf->buffer->use;
9307 }
9308 /* take into account the sequence length */
9309 if (third) len -= 2;
9310 else if (next) len --;
9311 for (;base < len;base++) {
9312 if (buf[base] == first) {
9313 if (third != 0) {
9314 if ((buf[base + 1] != next) ||
9315 (buf[base + 2] != third)) continue;
9316 } else if (next != 0) {
9317 if (buf[base + 1] != next) continue;
9318 }
9319 ctxt->checkIndex = 0;
9320#ifdef DEBUG_PUSH
9321 if (next == 0)
9322 xmlGenericError(xmlGenericErrorContext,
9323 "PP: lookup '%c' found at %d\n",
9324 first, base);
9325 else if (third == 0)
9326 xmlGenericError(xmlGenericErrorContext,
9327 "PP: lookup '%c%c' found at %d\n",
9328 first, next, base);
9329 else
9330 xmlGenericError(xmlGenericErrorContext,
9331 "PP: lookup '%c%c%c' found at %d\n",
9332 first, next, third, base);
9333#endif
9334 return(base - (in->cur - in->base));
9335 }
9336 }
9337 ctxt->checkIndex = base;
9338#ifdef DEBUG_PUSH
9339 if (next == 0)
9340 xmlGenericError(xmlGenericErrorContext,
9341 "PP: lookup '%c' failed\n", first);
9342 else if (third == 0)
9343 xmlGenericError(xmlGenericErrorContext,
9344 "PP: lookup '%c%c' failed\n", first, next);
9345 else
9346 xmlGenericError(xmlGenericErrorContext,
9347 "PP: lookup '%c%c%c' failed\n", first, next, third);
9348#endif
9349 return(-1);
9350}
9351
9352/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009353 * xmlParseGetLasts:
9354 * @ctxt: an XML parser context
9355 * @lastlt: pointer to store the last '<' from the input
9356 * @lastgt: pointer to store the last '>' from the input
9357 *
9358 * Lookup the last < and > in the current chunk
9359 */
9360static void
9361xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9362 const xmlChar **lastgt) {
9363 const xmlChar *tmp;
9364
9365 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9366 xmlGenericError(xmlGenericErrorContext,
9367 "Internal error: xmlParseGetLasts\n");
9368 return;
9369 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009370 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009371 tmp = ctxt->input->end;
9372 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009373 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009374 if (tmp < ctxt->input->base) {
9375 *lastlt = NULL;
9376 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009377 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009378 *lastlt = tmp;
9379 tmp++;
9380 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9381 if (*tmp == '\'') {
9382 tmp++;
9383 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9384 if (tmp < ctxt->input->end) tmp++;
9385 } else if (*tmp == '"') {
9386 tmp++;
9387 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9388 if (tmp < ctxt->input->end) tmp++;
9389 } else
9390 tmp++;
9391 }
9392 if (tmp < ctxt->input->end)
9393 *lastgt = tmp;
9394 else {
9395 tmp = *lastlt;
9396 tmp--;
9397 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9398 if (tmp >= ctxt->input->base)
9399 *lastgt = tmp;
9400 else
9401 *lastgt = NULL;
9402 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009403 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009404 } else {
9405 *lastlt = NULL;
9406 *lastgt = NULL;
9407 }
9408}
9409/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009410 * xmlCheckCdataPush:
9411 * @cur: pointer to the bock of characters
9412 * @len: length of the block in bytes
9413 *
9414 * Check that the block of characters is okay as SCdata content [20]
9415 *
9416 * Returns the number of bytes to pass if okay, a negative index where an
9417 * UTF-8 error occured otherwise
9418 */
9419static int
9420xmlCheckCdataPush(const xmlChar *utf, int len) {
9421 int ix;
9422 unsigned char c;
9423 int codepoint;
9424
9425 if ((utf == NULL) || (len <= 0))
9426 return(0);
9427
9428 for (ix = 0; ix < len;) { /* string is 0-terminated */
9429 c = utf[ix];
9430 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9431 if (c >= 0x20)
9432 ix++;
9433 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9434 ix++;
9435 else
9436 return(-ix);
9437 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9438 if (ix + 2 > len) return(ix);
9439 if ((utf[ix+1] & 0xc0 ) != 0x80)
9440 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009441 codepoint = (utf[ix] & 0x1f) << 6;
9442 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009443 if (!xmlIsCharQ(codepoint))
9444 return(-ix);
9445 ix += 2;
9446 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9447 if (ix + 3 > len) return(ix);
9448 if (((utf[ix+1] & 0xc0) != 0x80) ||
9449 ((utf[ix+2] & 0xc0) != 0x80))
9450 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009451 codepoint = (utf[ix] & 0xf) << 12;
9452 codepoint |= (utf[ix+1] & 0x3f) << 6;
9453 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009454 if (!xmlIsCharQ(codepoint))
9455 return(-ix);
9456 ix += 3;
9457 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9458 if (ix + 4 > len) return(ix);
9459 if (((utf[ix+1] & 0xc0) != 0x80) ||
9460 ((utf[ix+2] & 0xc0) != 0x80) ||
9461 ((utf[ix+3] & 0xc0) != 0x80))
9462 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009463 codepoint = (utf[ix] & 0x7) << 18;
9464 codepoint |= (utf[ix+1] & 0x3f) << 12;
9465 codepoint |= (utf[ix+2] & 0x3f) << 6;
9466 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009467 if (!xmlIsCharQ(codepoint))
9468 return(-ix);
9469 ix += 4;
9470 } else /* unknown encoding */
9471 return(-ix);
9472 }
9473 return(ix);
9474}
9475
9476/**
Owen Taylor3473f882001-02-23 17:55:21 +00009477 * xmlParseTryOrFinish:
9478 * @ctxt: an XML parser context
9479 * @terminate: last chunk indicator
9480 *
9481 * Try to progress on parsing
9482 *
9483 * Returns zero if no parsing was possible
9484 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009485static int
Owen Taylor3473f882001-02-23 17:55:21 +00009486xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9487 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009488 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009489 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009490 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009491
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009492 if (ctxt->input == NULL)
9493 return(0);
9494
Owen Taylor3473f882001-02-23 17:55:21 +00009495#ifdef DEBUG_PUSH
9496 switch (ctxt->instate) {
9497 case XML_PARSER_EOF:
9498 xmlGenericError(xmlGenericErrorContext,
9499 "PP: try EOF\n"); break;
9500 case XML_PARSER_START:
9501 xmlGenericError(xmlGenericErrorContext,
9502 "PP: try START\n"); break;
9503 case XML_PARSER_MISC:
9504 xmlGenericError(xmlGenericErrorContext,
9505 "PP: try MISC\n");break;
9506 case XML_PARSER_COMMENT:
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: try COMMENT\n");break;
9509 case XML_PARSER_PROLOG:
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: try PROLOG\n");break;
9512 case XML_PARSER_START_TAG:
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: try START_TAG\n");break;
9515 case XML_PARSER_CONTENT:
9516 xmlGenericError(xmlGenericErrorContext,
9517 "PP: try CONTENT\n");break;
9518 case XML_PARSER_CDATA_SECTION:
9519 xmlGenericError(xmlGenericErrorContext,
9520 "PP: try CDATA_SECTION\n");break;
9521 case XML_PARSER_END_TAG:
9522 xmlGenericError(xmlGenericErrorContext,
9523 "PP: try END_TAG\n");break;
9524 case XML_PARSER_ENTITY_DECL:
9525 xmlGenericError(xmlGenericErrorContext,
9526 "PP: try ENTITY_DECL\n");break;
9527 case XML_PARSER_ENTITY_VALUE:
9528 xmlGenericError(xmlGenericErrorContext,
9529 "PP: try ENTITY_VALUE\n");break;
9530 case XML_PARSER_ATTRIBUTE_VALUE:
9531 xmlGenericError(xmlGenericErrorContext,
9532 "PP: try ATTRIBUTE_VALUE\n");break;
9533 case XML_PARSER_DTD:
9534 xmlGenericError(xmlGenericErrorContext,
9535 "PP: try DTD\n");break;
9536 case XML_PARSER_EPILOG:
9537 xmlGenericError(xmlGenericErrorContext,
9538 "PP: try EPILOG\n");break;
9539 case XML_PARSER_PI:
9540 xmlGenericError(xmlGenericErrorContext,
9541 "PP: try PI\n");break;
9542 case XML_PARSER_IGNORE:
9543 xmlGenericError(xmlGenericErrorContext,
9544 "PP: try IGNORE\n");break;
9545 }
9546#endif
9547
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009548 if ((ctxt->input != NULL) &&
9549 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009550 xmlSHRINK(ctxt);
9551 ctxt->checkIndex = 0;
9552 }
9553 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009554
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009556 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009557 return(0);
9558
9559
Owen Taylor3473f882001-02-23 17:55:21 +00009560 /*
9561 * Pop-up of finished entities.
9562 */
9563 while ((RAW == 0) && (ctxt->inputNr > 1))
9564 xmlPopInput(ctxt);
9565
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009566 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009567 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009568 avail = ctxt->input->length -
9569 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009570 else {
9571 /*
9572 * If we are operating on converted input, try to flush
9573 * remainng chars to avoid them stalling in the non-converted
9574 * buffer.
9575 */
9576 if ((ctxt->input->buf->raw != NULL) &&
9577 (ctxt->input->buf->raw->use > 0)) {
9578 int base = ctxt->input->base -
9579 ctxt->input->buf->buffer->content;
9580 int current = ctxt->input->cur - ctxt->input->base;
9581
9582 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9583 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9584 ctxt->input->cur = ctxt->input->base + current;
9585 ctxt->input->end =
9586 &ctxt->input->buf->buffer->content[
9587 ctxt->input->buf->buffer->use];
9588 }
9589 avail = ctxt->input->buf->buffer->use -
9590 (ctxt->input->cur - ctxt->input->base);
9591 }
Owen Taylor3473f882001-02-23 17:55:21 +00009592 if (avail < 1)
9593 goto done;
9594 switch (ctxt->instate) {
9595 case XML_PARSER_EOF:
9596 /*
9597 * Document parsing is done !
9598 */
9599 goto done;
9600 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009601 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9602 xmlChar start[4];
9603 xmlCharEncoding enc;
9604
9605 /*
9606 * Very first chars read from the document flow.
9607 */
9608 if (avail < 4)
9609 goto done;
9610
9611 /*
9612 * Get the 4 first bytes and decode the charset
9613 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009614 * plug some encoding conversion routines,
9615 * else xmlSwitchEncoding will set to (default)
9616 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009617 */
9618 start[0] = RAW;
9619 start[1] = NXT(1);
9620 start[2] = NXT(2);
9621 start[3] = NXT(3);
9622 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009623 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009624 break;
9625 }
Owen Taylor3473f882001-02-23 17:55:21 +00009626
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009627 if (avail < 2)
9628 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009629 cur = ctxt->input->cur[0];
9630 next = ctxt->input->cur[1];
9631 if (cur == 0) {
9632 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9633 ctxt->sax->setDocumentLocator(ctxt->userData,
9634 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009635 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009636 ctxt->instate = XML_PARSER_EOF;
9637#ifdef DEBUG_PUSH
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: entering EOF\n");
9640#endif
9641 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9642 ctxt->sax->endDocument(ctxt->userData);
9643 goto done;
9644 }
9645 if ((cur == '<') && (next == '?')) {
9646 /* PI or XML decl */
9647 if (avail < 5) return(ret);
9648 if ((!terminate) &&
9649 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9650 return(ret);
9651 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9652 ctxt->sax->setDocumentLocator(ctxt->userData,
9653 &xmlDefaultSAXLocator);
9654 if ((ctxt->input->cur[2] == 'x') &&
9655 (ctxt->input->cur[3] == 'm') &&
9656 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009657 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009658 ret += 5;
9659#ifdef DEBUG_PUSH
9660 xmlGenericError(xmlGenericErrorContext,
9661 "PP: Parsing XML Decl\n");
9662#endif
9663 xmlParseXMLDecl(ctxt);
9664 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9665 /*
9666 * The XML REC instructs us to stop parsing right
9667 * here
9668 */
9669 ctxt->instate = XML_PARSER_EOF;
9670 return(0);
9671 }
9672 ctxt->standalone = ctxt->input->standalone;
9673 if ((ctxt->encoding == NULL) &&
9674 (ctxt->input->encoding != NULL))
9675 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9676 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9677 (!ctxt->disableSAX))
9678 ctxt->sax->startDocument(ctxt->userData);
9679 ctxt->instate = XML_PARSER_MISC;
9680#ifdef DEBUG_PUSH
9681 xmlGenericError(xmlGenericErrorContext,
9682 "PP: entering MISC\n");
9683#endif
9684 } else {
9685 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9686 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9687 (!ctxt->disableSAX))
9688 ctxt->sax->startDocument(ctxt->userData);
9689 ctxt->instate = XML_PARSER_MISC;
9690#ifdef DEBUG_PUSH
9691 xmlGenericError(xmlGenericErrorContext,
9692 "PP: entering MISC\n");
9693#endif
9694 }
9695 } else {
9696 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9697 ctxt->sax->setDocumentLocator(ctxt->userData,
9698 &xmlDefaultSAXLocator);
9699 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009700 if (ctxt->version == NULL) {
9701 xmlErrMemory(ctxt, NULL);
9702 break;
9703 }
Owen Taylor3473f882001-02-23 17:55:21 +00009704 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9705 (!ctxt->disableSAX))
9706 ctxt->sax->startDocument(ctxt->userData);
9707 ctxt->instate = XML_PARSER_MISC;
9708#ifdef DEBUG_PUSH
9709 xmlGenericError(xmlGenericErrorContext,
9710 "PP: entering MISC\n");
9711#endif
9712 }
9713 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009714 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009715 const xmlChar *name;
9716 const xmlChar *prefix;
9717 const xmlChar *URI;
9718 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009719
9720 if ((avail < 2) && (ctxt->inputNr == 1))
9721 goto done;
9722 cur = ctxt->input->cur[0];
9723 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009725 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009726 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9727 ctxt->sax->endDocument(ctxt->userData);
9728 goto done;
9729 }
9730 if (!terminate) {
9731 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009732 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009733 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009734 goto done;
9735 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9736 goto done;
9737 }
9738 }
9739 if (ctxt->spaceNr == 0)
9740 spacePush(ctxt, -1);
9741 else
9742 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009743#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009744 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009745#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009746 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009747#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009748 else
9749 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009750#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009751 if (name == NULL) {
9752 spacePop(ctxt);
9753 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009754 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9755 ctxt->sax->endDocument(ctxt->userData);
9756 goto done;
9757 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009758#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009759 /*
9760 * [ VC: Root Element Type ]
9761 * The Name in the document type declaration must match
9762 * the element type of the root element.
9763 */
9764 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9765 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9766 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009767#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009768
9769 /*
9770 * Check for an Empty Element.
9771 */
9772 if ((RAW == '/') && (NXT(1) == '>')) {
9773 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009774
9775 if (ctxt->sax2) {
9776 if ((ctxt->sax != NULL) &&
9777 (ctxt->sax->endElementNs != NULL) &&
9778 (!ctxt->disableSAX))
9779 ctxt->sax->endElementNs(ctxt->userData, name,
9780 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009781 if (ctxt->nsNr - nsNr > 0)
9782 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009783#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009784 } else {
9785 if ((ctxt->sax != NULL) &&
9786 (ctxt->sax->endElement != NULL) &&
9787 (!ctxt->disableSAX))
9788 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009789#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009790 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009791 spacePop(ctxt);
9792 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009793 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009794 } else {
9795 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009796 }
9797 break;
9798 }
9799 if (RAW == '>') {
9800 NEXT;
9801 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009802 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009803 "Couldn't find end of Start Tag %s\n",
9804 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009805 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009807 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009808 if (ctxt->sax2)
9809 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009810#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009811 else
9812 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009813#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009814
Daniel Veillarda880b122003-04-21 21:36:41 +00009815 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009816 break;
9817 }
9818 case XML_PARSER_CONTENT: {
9819 const xmlChar *test;
9820 unsigned int cons;
9821 if ((avail < 2) && (ctxt->inputNr == 1))
9822 goto done;
9823 cur = ctxt->input->cur[0];
9824 next = ctxt->input->cur[1];
9825
9826 test = CUR_PTR;
9827 cons = ctxt->input->consumed;
9828 if ((cur == '<') && (next == '/')) {
9829 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009830 break;
9831 } else if ((cur == '<') && (next == '?')) {
9832 if ((!terminate) &&
9833 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9834 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009835 xmlParsePI(ctxt);
9836 } else if ((cur == '<') && (next != '!')) {
9837 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 break;
9839 } else if ((cur == '<') && (next == '!') &&
9840 (ctxt->input->cur[2] == '-') &&
9841 (ctxt->input->cur[3] == '-')) {
9842 if ((!terminate) &&
9843 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9844 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009845 xmlParseComment(ctxt);
9846 ctxt->instate = XML_PARSER_CONTENT;
9847 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9848 (ctxt->input->cur[2] == '[') &&
9849 (ctxt->input->cur[3] == 'C') &&
9850 (ctxt->input->cur[4] == 'D') &&
9851 (ctxt->input->cur[5] == 'A') &&
9852 (ctxt->input->cur[6] == 'T') &&
9853 (ctxt->input->cur[7] == 'A') &&
9854 (ctxt->input->cur[8] == '[')) {
9855 SKIP(9);
9856 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009857 break;
9858 } else if ((cur == '<') && (next == '!') &&
9859 (avail < 9)) {
9860 goto done;
9861 } else if (cur == '&') {
9862 if ((!terminate) &&
9863 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9864 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009865 xmlParseReference(ctxt);
9866 } else {
9867 /* TODO Avoid the extra copy, handle directly !!! */
9868 /*
9869 * Goal of the following test is:
9870 * - minimize calls to the SAX 'character' callback
9871 * when they are mergeable
9872 * - handle an problem for isBlank when we only parse
9873 * a sequence of blank chars and the next one is
9874 * not available to check against '<' presence.
9875 * - tries to homogenize the differences in SAX
9876 * callbacks between the push and pull versions
9877 * of the parser.
9878 */
9879 if ((ctxt->inputNr == 1) &&
9880 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9881 if (!terminate) {
9882 if (ctxt->progressive) {
9883 if ((lastlt == NULL) ||
9884 (ctxt->input->cur > lastlt))
9885 goto done;
9886 } else if (xmlParseLookupSequence(ctxt,
9887 '<', 0, 0) < 0) {
9888 goto done;
9889 }
9890 }
9891 }
9892 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009893 xmlParseCharData(ctxt, 0);
9894 }
9895 /*
9896 * Pop-up of finished entities.
9897 */
9898 while ((RAW == 0) && (ctxt->inputNr > 1))
9899 xmlPopInput(ctxt);
9900 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009901 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9902 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009903 ctxt->instate = XML_PARSER_EOF;
9904 break;
9905 }
9906 break;
9907 }
9908 case XML_PARSER_END_TAG:
9909 if (avail < 2)
9910 goto done;
9911 if (!terminate) {
9912 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009913 /* > can be found unescaped in attribute values */
9914 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009915 goto done;
9916 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9917 goto done;
9918 }
9919 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009920 if (ctxt->sax2) {
9921 xmlParseEndTag2(ctxt,
9922 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9923 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009924 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009925 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009926 }
9927#ifdef LIBXML_SAX1_ENABLED
9928 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009929 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009930#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009931 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009932 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009933 } else {
9934 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009935 }
9936 break;
9937 case XML_PARSER_CDATA_SECTION: {
9938 /*
9939 * The Push mode need to have the SAX callback for
9940 * cdataBlock merge back contiguous callbacks.
9941 */
9942 int base;
9943
9944 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9945 if (base < 0) {
9946 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009947 int tmp;
9948
9949 tmp = xmlCheckCdataPush(ctxt->input->cur,
9950 XML_PARSER_BIG_BUFFER_SIZE);
9951 if (tmp < 0) {
9952 tmp = -tmp;
9953 ctxt->input->cur += tmp;
9954 goto encoding_error;
9955 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009956 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9957 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009958 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009959 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009960 else if (ctxt->sax->characters != NULL)
9961 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009962 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009963 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009964 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009965 ctxt->checkIndex = 0;
9966 }
9967 goto done;
9968 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009969 int tmp;
9970
9971 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9972 if ((tmp < 0) || (tmp != base)) {
9973 tmp = -tmp;
9974 ctxt->input->cur += tmp;
9975 goto encoding_error;
9976 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009977 if ((ctxt->sax != NULL) && (base > 0) &&
9978 (!ctxt->disableSAX)) {
9979 if (ctxt->sax->cdataBlock != NULL)
9980 ctxt->sax->cdataBlock(ctxt->userData,
9981 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009982 else if (ctxt->sax->characters != NULL)
9983 ctxt->sax->characters(ctxt->userData,
9984 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009985 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009986 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009987 ctxt->checkIndex = 0;
9988 ctxt->instate = XML_PARSER_CONTENT;
9989#ifdef DEBUG_PUSH
9990 xmlGenericError(xmlGenericErrorContext,
9991 "PP: entering CONTENT\n");
9992#endif
9993 }
9994 break;
9995 }
Owen Taylor3473f882001-02-23 17:55:21 +00009996 case XML_PARSER_MISC:
9997 SKIP_BLANKS;
9998 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009999 avail = ctxt->input->length -
10000 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010001 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010002 avail = ctxt->input->buf->buffer->use -
10003 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010004 if (avail < 2)
10005 goto done;
10006 cur = ctxt->input->cur[0];
10007 next = ctxt->input->cur[1];
10008 if ((cur == '<') && (next == '?')) {
10009 if ((!terminate) &&
10010 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10011 goto done;
10012#ifdef DEBUG_PUSH
10013 xmlGenericError(xmlGenericErrorContext,
10014 "PP: Parsing PI\n");
10015#endif
10016 xmlParsePI(ctxt);
10017 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010018 (ctxt->input->cur[2] == '-') &&
10019 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010020 if ((!terminate) &&
10021 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10022 goto done;
10023#ifdef DEBUG_PUSH
10024 xmlGenericError(xmlGenericErrorContext,
10025 "PP: Parsing Comment\n");
10026#endif
10027 xmlParseComment(ctxt);
10028 ctxt->instate = XML_PARSER_MISC;
10029 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010030 (ctxt->input->cur[2] == 'D') &&
10031 (ctxt->input->cur[3] == 'O') &&
10032 (ctxt->input->cur[4] == 'C') &&
10033 (ctxt->input->cur[5] == 'T') &&
10034 (ctxt->input->cur[6] == 'Y') &&
10035 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010036 (ctxt->input->cur[8] == 'E')) {
10037 if ((!terminate) &&
10038 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10039 goto done;
10040#ifdef DEBUG_PUSH
10041 xmlGenericError(xmlGenericErrorContext,
10042 "PP: Parsing internal subset\n");
10043#endif
10044 ctxt->inSubset = 1;
10045 xmlParseDocTypeDecl(ctxt);
10046 if (RAW == '[') {
10047 ctxt->instate = XML_PARSER_DTD;
10048#ifdef DEBUG_PUSH
10049 xmlGenericError(xmlGenericErrorContext,
10050 "PP: entering DTD\n");
10051#endif
10052 } else {
10053 /*
10054 * Create and update the external subset.
10055 */
10056 ctxt->inSubset = 2;
10057 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10058 (ctxt->sax->externalSubset != NULL))
10059 ctxt->sax->externalSubset(ctxt->userData,
10060 ctxt->intSubName, ctxt->extSubSystem,
10061 ctxt->extSubURI);
10062 ctxt->inSubset = 0;
10063 ctxt->instate = XML_PARSER_PROLOG;
10064#ifdef DEBUG_PUSH
10065 xmlGenericError(xmlGenericErrorContext,
10066 "PP: entering PROLOG\n");
10067#endif
10068 }
10069 } else if ((cur == '<') && (next == '!') &&
10070 (avail < 9)) {
10071 goto done;
10072 } else {
10073 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010074 ctxt->progressive = 1;
10075 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010076#ifdef DEBUG_PUSH
10077 xmlGenericError(xmlGenericErrorContext,
10078 "PP: entering START_TAG\n");
10079#endif
10080 }
10081 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010082 case XML_PARSER_PROLOG:
10083 SKIP_BLANKS;
10084 if (ctxt->input->buf == NULL)
10085 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10086 else
10087 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10088 if (avail < 2)
10089 goto done;
10090 cur = ctxt->input->cur[0];
10091 next = ctxt->input->cur[1];
10092 if ((cur == '<') && (next == '?')) {
10093 if ((!terminate) &&
10094 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10095 goto done;
10096#ifdef DEBUG_PUSH
10097 xmlGenericError(xmlGenericErrorContext,
10098 "PP: Parsing PI\n");
10099#endif
10100 xmlParsePI(ctxt);
10101 } else if ((cur == '<') && (next == '!') &&
10102 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10103 if ((!terminate) &&
10104 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10105 goto done;
10106#ifdef DEBUG_PUSH
10107 xmlGenericError(xmlGenericErrorContext,
10108 "PP: Parsing Comment\n");
10109#endif
10110 xmlParseComment(ctxt);
10111 ctxt->instate = XML_PARSER_PROLOG;
10112 } else if ((cur == '<') && (next == '!') &&
10113 (avail < 4)) {
10114 goto done;
10115 } else {
10116 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010117 if (ctxt->progressive == 0)
10118 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010119 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010120#ifdef DEBUG_PUSH
10121 xmlGenericError(xmlGenericErrorContext,
10122 "PP: entering START_TAG\n");
10123#endif
10124 }
10125 break;
10126 case XML_PARSER_EPILOG:
10127 SKIP_BLANKS;
10128 if (ctxt->input->buf == NULL)
10129 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10130 else
10131 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10132 if (avail < 2)
10133 goto done;
10134 cur = ctxt->input->cur[0];
10135 next = ctxt->input->cur[1];
10136 if ((cur == '<') && (next == '?')) {
10137 if ((!terminate) &&
10138 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10139 goto done;
10140#ifdef DEBUG_PUSH
10141 xmlGenericError(xmlGenericErrorContext,
10142 "PP: Parsing PI\n");
10143#endif
10144 xmlParsePI(ctxt);
10145 ctxt->instate = XML_PARSER_EPILOG;
10146 } else if ((cur == '<') && (next == '!') &&
10147 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10148 if ((!terminate) &&
10149 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10150 goto done;
10151#ifdef DEBUG_PUSH
10152 xmlGenericError(xmlGenericErrorContext,
10153 "PP: Parsing Comment\n");
10154#endif
10155 xmlParseComment(ctxt);
10156 ctxt->instate = XML_PARSER_EPILOG;
10157 } else if ((cur == '<') && (next == '!') &&
10158 (avail < 4)) {
10159 goto done;
10160 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010161 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010162 ctxt->instate = XML_PARSER_EOF;
10163#ifdef DEBUG_PUSH
10164 xmlGenericError(xmlGenericErrorContext,
10165 "PP: entering EOF\n");
10166#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010167 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010168 ctxt->sax->endDocument(ctxt->userData);
10169 goto done;
10170 }
10171 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010172 case XML_PARSER_DTD: {
10173 /*
10174 * Sorry but progressive parsing of the internal subset
10175 * is not expected to be supported. We first check that
10176 * the full content of the internal subset is available and
10177 * the parsing is launched only at that point.
10178 * Internal subset ends up with "']' S? '>'" in an unescaped
10179 * section and not in a ']]>' sequence which are conditional
10180 * sections (whoever argued to keep that crap in XML deserve
10181 * a place in hell !).
10182 */
10183 int base, i;
10184 xmlChar *buf;
10185 xmlChar quote = 0;
10186
10187 base = ctxt->input->cur - ctxt->input->base;
10188 if (base < 0) return(0);
10189 if (ctxt->checkIndex > base)
10190 base = ctxt->checkIndex;
10191 buf = ctxt->input->buf->buffer->content;
10192 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10193 base++) {
10194 if (quote != 0) {
10195 if (buf[base] == quote)
10196 quote = 0;
10197 continue;
10198 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010199 if ((quote == 0) && (buf[base] == '<')) {
10200 int found = 0;
10201 /* special handling of comments */
10202 if (((unsigned int) base + 4 <
10203 ctxt->input->buf->buffer->use) &&
10204 (buf[base + 1] == '!') &&
10205 (buf[base + 2] == '-') &&
10206 (buf[base + 3] == '-')) {
10207 for (;(unsigned int) base + 3 <
10208 ctxt->input->buf->buffer->use; base++) {
10209 if ((buf[base] == '-') &&
10210 (buf[base + 1] == '-') &&
10211 (buf[base + 2] == '>')) {
10212 found = 1;
10213 base += 2;
10214 break;
10215 }
10216 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010217 if (!found) {
10218#if 0
10219 fprintf(stderr, "unfinished comment\n");
10220#endif
10221 break; /* for */
10222 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010223 continue;
10224 }
10225 }
Owen Taylor3473f882001-02-23 17:55:21 +000010226 if (buf[base] == '"') {
10227 quote = '"';
10228 continue;
10229 }
10230 if (buf[base] == '\'') {
10231 quote = '\'';
10232 continue;
10233 }
10234 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010235#if 0
10236 fprintf(stderr, "%c%c%c%c: ", buf[base],
10237 buf[base + 1], buf[base + 2], buf[base + 3]);
10238#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010239 if ((unsigned int) base +1 >=
10240 ctxt->input->buf->buffer->use)
10241 break;
10242 if (buf[base + 1] == ']') {
10243 /* conditional crap, skip both ']' ! */
10244 base++;
10245 continue;
10246 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010247 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010248 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10249 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010250 if (buf[base + i] == '>') {
10251#if 0
10252 fprintf(stderr, "found\n");
10253#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010254 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010255 }
10256 if (!IS_BLANK_CH(buf[base + i])) {
10257#if 0
10258 fprintf(stderr, "not found\n");
10259#endif
10260 goto not_end_of_int_subset;
10261 }
Owen Taylor3473f882001-02-23 17:55:21 +000010262 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010263#if 0
10264 fprintf(stderr, "end of stream\n");
10265#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010266 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010267
Owen Taylor3473f882001-02-23 17:55:21 +000010268 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010269not_end_of_int_subset:
10270 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010271 }
10272 /*
10273 * We didn't found the end of the Internal subset
10274 */
Owen Taylor3473f882001-02-23 17:55:21 +000010275#ifdef DEBUG_PUSH
10276 if (next == 0)
10277 xmlGenericError(xmlGenericErrorContext,
10278 "PP: lookup of int subset end filed\n");
10279#endif
10280 goto done;
10281
10282found_end_int_subset:
10283 xmlParseInternalSubset(ctxt);
10284 ctxt->inSubset = 2;
10285 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10286 (ctxt->sax->externalSubset != NULL))
10287 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10288 ctxt->extSubSystem, ctxt->extSubURI);
10289 ctxt->inSubset = 0;
10290 ctxt->instate = XML_PARSER_PROLOG;
10291 ctxt->checkIndex = 0;
10292#ifdef DEBUG_PUSH
10293 xmlGenericError(xmlGenericErrorContext,
10294 "PP: entering PROLOG\n");
10295#endif
10296 break;
10297 }
10298 case XML_PARSER_COMMENT:
10299 xmlGenericError(xmlGenericErrorContext,
10300 "PP: internal error, state == COMMENT\n");
10301 ctxt->instate = XML_PARSER_CONTENT;
10302#ifdef DEBUG_PUSH
10303 xmlGenericError(xmlGenericErrorContext,
10304 "PP: entering CONTENT\n");
10305#endif
10306 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010307 case XML_PARSER_IGNORE:
10308 xmlGenericError(xmlGenericErrorContext,
10309 "PP: internal error, state == IGNORE");
10310 ctxt->instate = XML_PARSER_DTD;
10311#ifdef DEBUG_PUSH
10312 xmlGenericError(xmlGenericErrorContext,
10313 "PP: entering DTD\n");
10314#endif
10315 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010316 case XML_PARSER_PI:
10317 xmlGenericError(xmlGenericErrorContext,
10318 "PP: internal error, state == PI\n");
10319 ctxt->instate = XML_PARSER_CONTENT;
10320#ifdef DEBUG_PUSH
10321 xmlGenericError(xmlGenericErrorContext,
10322 "PP: entering CONTENT\n");
10323#endif
10324 break;
10325 case XML_PARSER_ENTITY_DECL:
10326 xmlGenericError(xmlGenericErrorContext,
10327 "PP: internal error, state == ENTITY_DECL\n");
10328 ctxt->instate = XML_PARSER_DTD;
10329#ifdef DEBUG_PUSH
10330 xmlGenericError(xmlGenericErrorContext,
10331 "PP: entering DTD\n");
10332#endif
10333 break;
10334 case XML_PARSER_ENTITY_VALUE:
10335 xmlGenericError(xmlGenericErrorContext,
10336 "PP: internal error, state == ENTITY_VALUE\n");
10337 ctxt->instate = XML_PARSER_CONTENT;
10338#ifdef DEBUG_PUSH
10339 xmlGenericError(xmlGenericErrorContext,
10340 "PP: entering DTD\n");
10341#endif
10342 break;
10343 case XML_PARSER_ATTRIBUTE_VALUE:
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10346 ctxt->instate = XML_PARSER_START_TAG;
10347#ifdef DEBUG_PUSH
10348 xmlGenericError(xmlGenericErrorContext,
10349 "PP: entering START_TAG\n");
10350#endif
10351 break;
10352 case XML_PARSER_SYSTEM_LITERAL:
10353 xmlGenericError(xmlGenericErrorContext,
10354 "PP: internal error, state == SYSTEM_LITERAL\n");
10355 ctxt->instate = XML_PARSER_START_TAG;
10356#ifdef DEBUG_PUSH
10357 xmlGenericError(xmlGenericErrorContext,
10358 "PP: entering START_TAG\n");
10359#endif
10360 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010361 case XML_PARSER_PUBLIC_LITERAL:
10362 xmlGenericError(xmlGenericErrorContext,
10363 "PP: internal error, state == PUBLIC_LITERAL\n");
10364 ctxt->instate = XML_PARSER_START_TAG;
10365#ifdef DEBUG_PUSH
10366 xmlGenericError(xmlGenericErrorContext,
10367 "PP: entering START_TAG\n");
10368#endif
10369 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010370 }
10371 }
10372done:
10373#ifdef DEBUG_PUSH
10374 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10375#endif
10376 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010377encoding_error:
10378 {
10379 char buffer[150];
10380
10381 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10382 ctxt->input->cur[0], ctxt->input->cur[1],
10383 ctxt->input->cur[2], ctxt->input->cur[3]);
10384 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10385 "Input is not proper UTF-8, indicate encoding !\n%s",
10386 BAD_CAST buffer, NULL);
10387 }
10388 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010389}
10390
10391/**
Owen Taylor3473f882001-02-23 17:55:21 +000010392 * xmlParseChunk:
10393 * @ctxt: an XML parser context
10394 * @chunk: an char array
10395 * @size: the size in byte of the chunk
10396 * @terminate: last chunk indicator
10397 *
10398 * Parse a Chunk of memory
10399 *
10400 * Returns zero if no error, the xmlParserErrors otherwise.
10401 */
10402int
10403xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10404 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010405 if (ctxt == NULL)
10406 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010407 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010408 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010409 if (ctxt->instate == XML_PARSER_START)
10410 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010411 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10412 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10413 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10414 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010415 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010416
William M. Bracka3215c72004-07-31 16:24:01 +000010417 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10418 if (res < 0) {
10419 ctxt->errNo = XML_PARSER_EOF;
10420 ctxt->disableSAX = 1;
10421 return (XML_PARSER_EOF);
10422 }
Owen Taylor3473f882001-02-23 17:55:21 +000010423 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10424 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010425 ctxt->input->end =
10426 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010427#ifdef DEBUG_PUSH
10428 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10429#endif
10430
Owen Taylor3473f882001-02-23 17:55:21 +000010431 } else if (ctxt->instate != XML_PARSER_EOF) {
10432 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10433 xmlParserInputBufferPtr in = ctxt->input->buf;
10434 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10435 (in->raw != NULL)) {
10436 int nbchars;
10437
10438 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10439 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010440 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010441 xmlGenericError(xmlGenericErrorContext,
10442 "xmlParseChunk: encoder error\n");
10443 return(XML_ERR_INVALID_ENCODING);
10444 }
10445 }
10446 }
10447 }
10448 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010449 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010450 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010451 if (terminate) {
10452 /*
10453 * Check for termination
10454 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010455 int avail = 0;
10456
10457 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010458 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010459 avail = ctxt->input->length -
10460 (ctxt->input->cur - ctxt->input->base);
10461 else
10462 avail = ctxt->input->buf->buffer->use -
10463 (ctxt->input->cur - ctxt->input->base);
10464 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010465
Owen Taylor3473f882001-02-23 17:55:21 +000010466 if ((ctxt->instate != XML_PARSER_EOF) &&
10467 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010468 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010469 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010470 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010471 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010472 }
Owen Taylor3473f882001-02-23 17:55:21 +000010473 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010474 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010475 ctxt->sax->endDocument(ctxt->userData);
10476 }
10477 ctxt->instate = XML_PARSER_EOF;
10478 }
10479 return((xmlParserErrors) ctxt->errNo);
10480}
10481
10482/************************************************************************
10483 * *
10484 * I/O front end functions to the parser *
10485 * *
10486 ************************************************************************/
10487
10488/**
Owen Taylor3473f882001-02-23 17:55:21 +000010489 * xmlCreatePushParserCtxt:
10490 * @sax: a SAX handler
10491 * @user_data: The user data returned on SAX callbacks
10492 * @chunk: a pointer to an array of chars
10493 * @size: number of chars in the array
10494 * @filename: an optional file name or URI
10495 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010496 * Create a parser context for using the XML parser in push mode.
10497 * If @buffer and @size are non-NULL, the data is used to detect
10498 * the encoding. The remaining characters will be parsed so they
10499 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010500 * To allow content encoding detection, @size should be >= 4
10501 * The value of @filename is used for fetching external entities
10502 * and error/warning reports.
10503 *
10504 * Returns the new parser context or NULL
10505 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010506
Owen Taylor3473f882001-02-23 17:55:21 +000010507xmlParserCtxtPtr
10508xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10509 const char *chunk, int size, const char *filename) {
10510 xmlParserCtxtPtr ctxt;
10511 xmlParserInputPtr inputStream;
10512 xmlParserInputBufferPtr buf;
10513 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10514
10515 /*
10516 * plug some encoding conversion routines
10517 */
10518 if ((chunk != NULL) && (size >= 4))
10519 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10520
10521 buf = xmlAllocParserInputBuffer(enc);
10522 if (buf == NULL) return(NULL);
10523
10524 ctxt = xmlNewParserCtxt();
10525 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010526 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010527 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010528 return(NULL);
10529 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010530 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010531 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10532 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010534 xmlFreeParserInputBuffer(buf);
10535 xmlFreeParserCtxt(ctxt);
10536 return(NULL);
10537 }
Owen Taylor3473f882001-02-23 17:55:21 +000010538 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010539#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010540 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010541#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010542 xmlFree(ctxt->sax);
10543 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10544 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010546 xmlFreeParserInputBuffer(buf);
10547 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010548 return(NULL);
10549 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010550 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10551 if (sax->initialized == XML_SAX2_MAGIC)
10552 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10553 else
10554 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010555 if (user_data != NULL)
10556 ctxt->userData = user_data;
10557 }
10558 if (filename == NULL) {
10559 ctxt->directory = NULL;
10560 } else {
10561 ctxt->directory = xmlParserGetDirectory(filename);
10562 }
10563
10564 inputStream = xmlNewInputStream(ctxt);
10565 if (inputStream == NULL) {
10566 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010567 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010568 return(NULL);
10569 }
10570
10571 if (filename == NULL)
10572 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010573 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010574 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010575 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010576 if (inputStream->filename == NULL) {
10577 xmlFreeParserCtxt(ctxt);
10578 xmlFreeParserInputBuffer(buf);
10579 return(NULL);
10580 }
10581 }
Owen Taylor3473f882001-02-23 17:55:21 +000010582 inputStream->buf = buf;
10583 inputStream->base = inputStream->buf->buffer->content;
10584 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010585 inputStream->end =
10586 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010587
10588 inputPush(ctxt, inputStream);
10589
William M. Brack3a1cd212005-02-11 14:35:54 +000010590 /*
10591 * If the caller didn't provide an initial 'chunk' for determining
10592 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10593 * that it can be automatically determined later
10594 */
10595 if ((size == 0) || (chunk == NULL)) {
10596 ctxt->charset = XML_CHAR_ENCODING_NONE;
10597 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010598 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10599 int cur = ctxt->input->cur - ctxt->input->base;
10600
Owen Taylor3473f882001-02-23 17:55:21 +000010601 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010602
10603 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10604 ctxt->input->cur = ctxt->input->base + cur;
10605 ctxt->input->end =
10606 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010607#ifdef DEBUG_PUSH
10608 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10609#endif
10610 }
10611
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010612 if (enc != XML_CHAR_ENCODING_NONE) {
10613 xmlSwitchEncoding(ctxt, enc);
10614 }
10615
Owen Taylor3473f882001-02-23 17:55:21 +000010616 return(ctxt);
10617}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010618#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010619
10620/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010621 * xmlStopParser:
10622 * @ctxt: an XML parser context
10623 *
10624 * Blocks further parser processing
10625 */
10626void
10627xmlStopParser(xmlParserCtxtPtr ctxt) {
10628 if (ctxt == NULL)
10629 return;
10630 ctxt->instate = XML_PARSER_EOF;
10631 ctxt->disableSAX = 1;
10632 if (ctxt->input != NULL) {
10633 ctxt->input->cur = BAD_CAST"";
10634 ctxt->input->base = ctxt->input->cur;
10635 }
10636}
10637
10638/**
Owen Taylor3473f882001-02-23 17:55:21 +000010639 * xmlCreateIOParserCtxt:
10640 * @sax: a SAX handler
10641 * @user_data: The user data returned on SAX callbacks
10642 * @ioread: an I/O read function
10643 * @ioclose: an I/O close function
10644 * @ioctx: an I/O handler
10645 * @enc: the charset encoding if known
10646 *
10647 * Create a parser context for using the XML parser with an existing
10648 * I/O stream
10649 *
10650 * Returns the new parser context or NULL
10651 */
10652xmlParserCtxtPtr
10653xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10654 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10655 void *ioctx, xmlCharEncoding enc) {
10656 xmlParserCtxtPtr ctxt;
10657 xmlParserInputPtr inputStream;
10658 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010659
10660 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010661
10662 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10663 if (buf == NULL) return(NULL);
10664
10665 ctxt = xmlNewParserCtxt();
10666 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010667 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010668 return(NULL);
10669 }
10670 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010671#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010672 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010673#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010674 xmlFree(ctxt->sax);
10675 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10676 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010677 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010678 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010679 return(NULL);
10680 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010681 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10682 if (sax->initialized == XML_SAX2_MAGIC)
10683 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10684 else
10685 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010686 if (user_data != NULL)
10687 ctxt->userData = user_data;
10688 }
10689
10690 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10691 if (inputStream == NULL) {
10692 xmlFreeParserCtxt(ctxt);
10693 return(NULL);
10694 }
10695 inputPush(ctxt, inputStream);
10696
10697 return(ctxt);
10698}
10699
Daniel Veillard4432df22003-09-28 18:58:27 +000010700#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010701/************************************************************************
10702 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010703 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010704 * *
10705 ************************************************************************/
10706
10707/**
10708 * xmlIOParseDTD:
10709 * @sax: the SAX handler block or NULL
10710 * @input: an Input Buffer
10711 * @enc: the charset encoding if known
10712 *
10713 * Load and parse a DTD
10714 *
10715 * Returns the resulting xmlDtdPtr or NULL in case of error.
10716 * @input will be freed at parsing end.
10717 */
10718
10719xmlDtdPtr
10720xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10721 xmlCharEncoding enc) {
10722 xmlDtdPtr ret = NULL;
10723 xmlParserCtxtPtr ctxt;
10724 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010725 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010726
10727 if (input == NULL)
10728 return(NULL);
10729
10730 ctxt = xmlNewParserCtxt();
10731 if (ctxt == NULL) {
10732 return(NULL);
10733 }
10734
10735 /*
10736 * Set-up the SAX context
10737 */
10738 if (sax != NULL) {
10739 if (ctxt->sax != NULL)
10740 xmlFree(ctxt->sax);
10741 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010742 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010743 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010744 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010745
10746 /*
10747 * generate a parser input from the I/O handler
10748 */
10749
Daniel Veillard43caefb2003-12-07 19:32:22 +000010750 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010751 if (pinput == NULL) {
10752 if (sax != NULL) ctxt->sax = NULL;
10753 xmlFreeParserCtxt(ctxt);
10754 return(NULL);
10755 }
10756
10757 /*
10758 * plug some encoding conversion routines here.
10759 */
10760 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010761 if (enc != XML_CHAR_ENCODING_NONE) {
10762 xmlSwitchEncoding(ctxt, enc);
10763 }
Owen Taylor3473f882001-02-23 17:55:21 +000010764
10765 pinput->filename = NULL;
10766 pinput->line = 1;
10767 pinput->col = 1;
10768 pinput->base = ctxt->input->cur;
10769 pinput->cur = ctxt->input->cur;
10770 pinput->free = NULL;
10771
10772 /*
10773 * let's parse that entity knowing it's an external subset.
10774 */
10775 ctxt->inSubset = 2;
10776 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10777 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10778 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010779
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010780 if ((enc == XML_CHAR_ENCODING_NONE) &&
10781 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010782 /*
10783 * Get the 4 first bytes and decode the charset
10784 * if enc != XML_CHAR_ENCODING_NONE
10785 * plug some encoding conversion routines.
10786 */
10787 start[0] = RAW;
10788 start[1] = NXT(1);
10789 start[2] = NXT(2);
10790 start[3] = NXT(3);
10791 enc = xmlDetectCharEncoding(start, 4);
10792 if (enc != XML_CHAR_ENCODING_NONE) {
10793 xmlSwitchEncoding(ctxt, enc);
10794 }
10795 }
10796
Owen Taylor3473f882001-02-23 17:55:21 +000010797 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10798
10799 if (ctxt->myDoc != NULL) {
10800 if (ctxt->wellFormed) {
10801 ret = ctxt->myDoc->extSubset;
10802 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010803 if (ret != NULL) {
10804 xmlNodePtr tmp;
10805
10806 ret->doc = NULL;
10807 tmp = ret->children;
10808 while (tmp != NULL) {
10809 tmp->doc = NULL;
10810 tmp = tmp->next;
10811 }
10812 }
Owen Taylor3473f882001-02-23 17:55:21 +000010813 } else {
10814 ret = NULL;
10815 }
10816 xmlFreeDoc(ctxt->myDoc);
10817 ctxt->myDoc = NULL;
10818 }
10819 if (sax != NULL) ctxt->sax = NULL;
10820 xmlFreeParserCtxt(ctxt);
10821
10822 return(ret);
10823}
10824
10825/**
10826 * xmlSAXParseDTD:
10827 * @sax: the SAX handler block
10828 * @ExternalID: a NAME* containing the External ID of the DTD
10829 * @SystemID: a NAME* containing the URL to the DTD
10830 *
10831 * Load and parse an external subset.
10832 *
10833 * Returns the resulting xmlDtdPtr or NULL in case of error.
10834 */
10835
10836xmlDtdPtr
10837xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10838 const xmlChar *SystemID) {
10839 xmlDtdPtr ret = NULL;
10840 xmlParserCtxtPtr ctxt;
10841 xmlParserInputPtr input = NULL;
10842 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010843 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010844
10845 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10846
10847 ctxt = xmlNewParserCtxt();
10848 if (ctxt == NULL) {
10849 return(NULL);
10850 }
10851
10852 /*
10853 * Set-up the SAX context
10854 */
10855 if (sax != NULL) {
10856 if (ctxt->sax != NULL)
10857 xmlFree(ctxt->sax);
10858 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010859 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010860 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010861
10862 /*
10863 * Canonicalise the system ID
10864 */
10865 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010866 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010867 xmlFreeParserCtxt(ctxt);
10868 return(NULL);
10869 }
Owen Taylor3473f882001-02-23 17:55:21 +000010870
10871 /*
10872 * Ask the Entity resolver to load the damn thing
10873 */
10874
10875 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010876 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010877 if (input == NULL) {
10878 if (sax != NULL) ctxt->sax = NULL;
10879 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010880 if (systemIdCanonic != NULL)
10881 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010882 return(NULL);
10883 }
10884
10885 /*
10886 * plug some encoding conversion routines here.
10887 */
10888 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010889 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10890 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10891 xmlSwitchEncoding(ctxt, enc);
10892 }
Owen Taylor3473f882001-02-23 17:55:21 +000010893
10894 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010895 input->filename = (char *) systemIdCanonic;
10896 else
10897 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010898 input->line = 1;
10899 input->col = 1;
10900 input->base = ctxt->input->cur;
10901 input->cur = ctxt->input->cur;
10902 input->free = NULL;
10903
10904 /*
10905 * let's parse that entity knowing it's an external subset.
10906 */
10907 ctxt->inSubset = 2;
10908 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10909 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10910 ExternalID, SystemID);
10911 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10912
10913 if (ctxt->myDoc != NULL) {
10914 if (ctxt->wellFormed) {
10915 ret = ctxt->myDoc->extSubset;
10916 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010917 if (ret != NULL) {
10918 xmlNodePtr tmp;
10919
10920 ret->doc = NULL;
10921 tmp = ret->children;
10922 while (tmp != NULL) {
10923 tmp->doc = NULL;
10924 tmp = tmp->next;
10925 }
10926 }
Owen Taylor3473f882001-02-23 17:55:21 +000010927 } else {
10928 ret = NULL;
10929 }
10930 xmlFreeDoc(ctxt->myDoc);
10931 ctxt->myDoc = NULL;
10932 }
10933 if (sax != NULL) ctxt->sax = NULL;
10934 xmlFreeParserCtxt(ctxt);
10935
10936 return(ret);
10937}
10938
Daniel Veillard4432df22003-09-28 18:58:27 +000010939
Owen Taylor3473f882001-02-23 17:55:21 +000010940/**
10941 * xmlParseDTD:
10942 * @ExternalID: a NAME* containing the External ID of the DTD
10943 * @SystemID: a NAME* containing the URL to the DTD
10944 *
10945 * Load and parse an external subset.
10946 *
10947 * Returns the resulting xmlDtdPtr or NULL in case of error.
10948 */
10949
10950xmlDtdPtr
10951xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10952 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10953}
Daniel Veillard4432df22003-09-28 18:58:27 +000010954#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010955
10956/************************************************************************
10957 * *
10958 * Front ends when parsing an Entity *
10959 * *
10960 ************************************************************************/
10961
10962/**
Owen Taylor3473f882001-02-23 17:55:21 +000010963 * xmlParseCtxtExternalEntity:
10964 * @ctx: the existing parsing context
10965 * @URL: the URL for the entity to load
10966 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010967 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010968 *
10969 * Parse an external general entity within an existing parsing context
10970 * An external general parsed entity is well-formed if it matches the
10971 * production labeled extParsedEnt.
10972 *
10973 * [78] extParsedEnt ::= TextDecl? content
10974 *
10975 * Returns 0 if the entity is well formed, -1 in case of args problem and
10976 * the parser error code otherwise
10977 */
10978
10979int
10980xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010981 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010982 xmlParserCtxtPtr ctxt;
10983 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010984 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010985 xmlSAXHandlerPtr oldsax = NULL;
10986 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010987 xmlChar start[4];
10988 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010989
Daniel Veillardce682bc2004-11-05 17:22:25 +000010990 if (ctx == NULL) return(-1);
10991
Owen Taylor3473f882001-02-23 17:55:21 +000010992 if (ctx->depth > 40) {
10993 return(XML_ERR_ENTITY_LOOP);
10994 }
10995
Daniel Veillardcda96922001-08-21 10:56:31 +000010996 if (lst != NULL)
10997 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010998 if ((URL == NULL) && (ID == NULL))
10999 return(-1);
11000 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11001 return(-1);
11002
11003
11004 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11005 if (ctxt == NULL) return(-1);
11006 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011007 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011008 oldsax = ctxt->sax;
11009 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011010 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011011 newDoc = xmlNewDoc(BAD_CAST "1.0");
11012 if (newDoc == NULL) {
11013 xmlFreeParserCtxt(ctxt);
11014 return(-1);
11015 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011016 if (ctx->myDoc->dict) {
11017 newDoc->dict = ctx->myDoc->dict;
11018 xmlDictReference(newDoc->dict);
11019 }
Owen Taylor3473f882001-02-23 17:55:21 +000011020 if (ctx->myDoc != NULL) {
11021 newDoc->intSubset = ctx->myDoc->intSubset;
11022 newDoc->extSubset = ctx->myDoc->extSubset;
11023 }
11024 if (ctx->myDoc->URL != NULL) {
11025 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11026 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011027 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11028 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011029 ctxt->sax = oldsax;
11030 xmlFreeParserCtxt(ctxt);
11031 newDoc->intSubset = NULL;
11032 newDoc->extSubset = NULL;
11033 xmlFreeDoc(newDoc);
11034 return(-1);
11035 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011036 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011037 nodePush(ctxt, newDoc->children);
11038 if (ctx->myDoc == NULL) {
11039 ctxt->myDoc = newDoc;
11040 } else {
11041 ctxt->myDoc = ctx->myDoc;
11042 newDoc->children->doc = ctx->myDoc;
11043 }
11044
Daniel Veillard87a764e2001-06-20 17:41:10 +000011045 /*
11046 * Get the 4 first bytes and decode the charset
11047 * if enc != XML_CHAR_ENCODING_NONE
11048 * plug some encoding conversion routines.
11049 */
11050 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011051 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11052 start[0] = RAW;
11053 start[1] = NXT(1);
11054 start[2] = NXT(2);
11055 start[3] = NXT(3);
11056 enc = xmlDetectCharEncoding(start, 4);
11057 if (enc != XML_CHAR_ENCODING_NONE) {
11058 xmlSwitchEncoding(ctxt, enc);
11059 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011060 }
11061
Owen Taylor3473f882001-02-23 17:55:21 +000011062 /*
11063 * Parse a possible text declaration first
11064 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011065 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011066 xmlParseTextDecl(ctxt);
11067 }
11068
11069 /*
11070 * Doing validity checking on chunk doesn't make sense
11071 */
11072 ctxt->instate = XML_PARSER_CONTENT;
11073 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011074 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011075 ctxt->loadsubset = ctx->loadsubset;
11076 ctxt->depth = ctx->depth + 1;
11077 ctxt->replaceEntities = ctx->replaceEntities;
11078 if (ctxt->validate) {
11079 ctxt->vctxt.error = ctx->vctxt.error;
11080 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011081 } else {
11082 ctxt->vctxt.error = NULL;
11083 ctxt->vctxt.warning = NULL;
11084 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011085 ctxt->vctxt.nodeTab = NULL;
11086 ctxt->vctxt.nodeNr = 0;
11087 ctxt->vctxt.nodeMax = 0;
11088 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011089 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11090 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011091 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11092 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11093 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011094 ctxt->dictNames = ctx->dictNames;
11095 ctxt->attsDefault = ctx->attsDefault;
11096 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011097 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011098
11099 xmlParseContent(ctxt);
11100
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011101 ctx->validate = ctxt->validate;
11102 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011103 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011104 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011105 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011106 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011107 }
11108 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011109 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011110 }
11111
11112 if (!ctxt->wellFormed) {
11113 if (ctxt->errNo == 0)
11114 ret = 1;
11115 else
11116 ret = ctxt->errNo;
11117 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011118 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011119 xmlNodePtr cur;
11120
11121 /*
11122 * Return the newly created nodeset after unlinking it from
11123 * they pseudo parent.
11124 */
11125 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011126 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011127 while (cur != NULL) {
11128 cur->parent = NULL;
11129 cur = cur->next;
11130 }
11131 newDoc->children->children = NULL;
11132 }
11133 ret = 0;
11134 }
11135 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011136 ctxt->dict = NULL;
11137 ctxt->attsDefault = NULL;
11138 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011139 xmlFreeParserCtxt(ctxt);
11140 newDoc->intSubset = NULL;
11141 newDoc->extSubset = NULL;
11142 xmlFreeDoc(newDoc);
11143
11144 return(ret);
11145}
11146
11147/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011148 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011149 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011150 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011151 * @sax: the SAX handler bloc (possibly NULL)
11152 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11153 * @depth: Used for loop detection, use 0
11154 * @URL: the URL for the entity to load
11155 * @ID: the System ID for the entity to load
11156 * @list: the return value for the set of parsed nodes
11157 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011158 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011159 *
11160 * Returns 0 if the entity is well formed, -1 in case of args problem and
11161 * the parser error code otherwise
11162 */
11163
Daniel Veillard7d515752003-09-26 19:12:37 +000011164static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011165xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11166 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011167 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011168 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011169 xmlParserCtxtPtr ctxt;
11170 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011171 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011172 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011173 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011174 xmlChar start[4];
11175 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011176
11177 if (depth > 40) {
11178 return(XML_ERR_ENTITY_LOOP);
11179 }
11180
11181
11182
11183 if (list != NULL)
11184 *list = NULL;
11185 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011186 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011187 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011188 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011189
11190
11191 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011192 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011193 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011194 if (oldctxt != NULL) {
11195 ctxt->_private = oldctxt->_private;
11196 ctxt->loadsubset = oldctxt->loadsubset;
11197 ctxt->validate = oldctxt->validate;
11198 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011199 ctxt->record_info = oldctxt->record_info;
11200 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11201 ctxt->node_seq.length = oldctxt->node_seq.length;
11202 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011203 } else {
11204 /*
11205 * Doing validity checking on chunk without context
11206 * doesn't make sense
11207 */
11208 ctxt->_private = NULL;
11209 ctxt->validate = 0;
11210 ctxt->external = 2;
11211 ctxt->loadsubset = 0;
11212 }
Owen Taylor3473f882001-02-23 17:55:21 +000011213 if (sax != NULL) {
11214 oldsax = ctxt->sax;
11215 ctxt->sax = sax;
11216 if (user_data != NULL)
11217 ctxt->userData = user_data;
11218 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011219 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011220 newDoc = xmlNewDoc(BAD_CAST "1.0");
11221 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011222 ctxt->node_seq.maximum = 0;
11223 ctxt->node_seq.length = 0;
11224 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011225 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011226 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011227 }
11228 if (doc != NULL) {
11229 newDoc->intSubset = doc->intSubset;
11230 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011231 newDoc->dict = doc->dict;
11232 } else if (oldctxt != NULL) {
11233 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011234 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011235 xmlDictReference(newDoc->dict);
11236
Owen Taylor3473f882001-02-23 17:55:21 +000011237 if (doc->URL != NULL) {
11238 newDoc->URL = xmlStrdup(doc->URL);
11239 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011240 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11241 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011242 if (sax != NULL)
11243 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011244 ctxt->node_seq.maximum = 0;
11245 ctxt->node_seq.length = 0;
11246 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011247 xmlFreeParserCtxt(ctxt);
11248 newDoc->intSubset = NULL;
11249 newDoc->extSubset = NULL;
11250 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011251 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011252 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011253 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011254 nodePush(ctxt, newDoc->children);
11255 if (doc == NULL) {
11256 ctxt->myDoc = newDoc;
11257 } else {
11258 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011259 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011260 }
11261
Daniel Veillard87a764e2001-06-20 17:41:10 +000011262 /*
11263 * Get the 4 first bytes and decode the charset
11264 * if enc != XML_CHAR_ENCODING_NONE
11265 * plug some encoding conversion routines.
11266 */
11267 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011268 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11269 start[0] = RAW;
11270 start[1] = NXT(1);
11271 start[2] = NXT(2);
11272 start[3] = NXT(3);
11273 enc = xmlDetectCharEncoding(start, 4);
11274 if (enc != XML_CHAR_ENCODING_NONE) {
11275 xmlSwitchEncoding(ctxt, enc);
11276 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011277 }
11278
Owen Taylor3473f882001-02-23 17:55:21 +000011279 /*
11280 * Parse a possible text declaration first
11281 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011282 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011283 xmlParseTextDecl(ctxt);
11284 }
11285
Owen Taylor3473f882001-02-23 17:55:21 +000011286 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011287 ctxt->depth = depth;
11288
11289 xmlParseContent(ctxt);
11290
Daniel Veillard561b7f82002-03-20 21:55:57 +000011291 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011292 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011293 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011294 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011295 }
11296 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011297 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011298 }
11299
11300 if (!ctxt->wellFormed) {
11301 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011302 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011303 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011304 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011305 } else {
11306 if (list != NULL) {
11307 xmlNodePtr cur;
11308
11309 /*
11310 * Return the newly created nodeset after unlinking it from
11311 * they pseudo parent.
11312 */
11313 cur = newDoc->children->children;
11314 *list = cur;
11315 while (cur != NULL) {
11316 cur->parent = NULL;
11317 cur = cur->next;
11318 }
11319 newDoc->children->children = NULL;
11320 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011321 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011322 }
11323 if (sax != NULL)
11324 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011325 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11326 oldctxt->node_seq.length = ctxt->node_seq.length;
11327 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011328 ctxt->node_seq.maximum = 0;
11329 ctxt->node_seq.length = 0;
11330 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011331 xmlFreeParserCtxt(ctxt);
11332 newDoc->intSubset = NULL;
11333 newDoc->extSubset = NULL;
11334 xmlFreeDoc(newDoc);
11335
11336 return(ret);
11337}
11338
Daniel Veillard81273902003-09-30 00:43:48 +000011339#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011340/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011341 * xmlParseExternalEntity:
11342 * @doc: the document the chunk pertains to
11343 * @sax: the SAX handler bloc (possibly NULL)
11344 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11345 * @depth: Used for loop detection, use 0
11346 * @URL: the URL for the entity to load
11347 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011348 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011349 *
11350 * Parse an external general entity
11351 * An external general parsed entity is well-formed if it matches the
11352 * production labeled extParsedEnt.
11353 *
11354 * [78] extParsedEnt ::= TextDecl? content
11355 *
11356 * Returns 0 if the entity is well formed, -1 in case of args problem and
11357 * the parser error code otherwise
11358 */
11359
11360int
11361xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011362 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011363 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011364 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011365}
11366
11367/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011368 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011369 * @doc: the document the chunk pertains to
11370 * @sax: the SAX handler bloc (possibly NULL)
11371 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11372 * @depth: Used for loop detection, use 0
11373 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011374 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011375 *
11376 * Parse a well-balanced chunk of an XML document
11377 * called by the parser
11378 * The allowed sequence for the Well Balanced Chunk is the one defined by
11379 * the content production in the XML grammar:
11380 *
11381 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11382 *
11383 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11384 * the parser error code otherwise
11385 */
11386
11387int
11388xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011389 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011390 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11391 depth, string, lst, 0 );
11392}
Daniel Veillard81273902003-09-30 00:43:48 +000011393#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011394
11395/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011396 * xmlParseBalancedChunkMemoryInternal:
11397 * @oldctxt: the existing parsing context
11398 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11399 * @user_data: the user data field for the parser context
11400 * @lst: the return value for the set of parsed nodes
11401 *
11402 *
11403 * Parse a well-balanced chunk of an XML document
11404 * called by the parser
11405 * The allowed sequence for the Well Balanced Chunk is the one defined by
11406 * the content production in the XML grammar:
11407 *
11408 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11409 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011410 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11411 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011412 *
11413 * In case recover is set to 1, the nodelist will not be empty even if
11414 * the parsed chunk is not well balanced.
11415 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011416static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011417xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11418 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11419 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011420 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011421 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011422 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011423 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011424 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011425 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011426 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011427
11428 if (oldctxt->depth > 40) {
11429 return(XML_ERR_ENTITY_LOOP);
11430 }
11431
11432
11433 if (lst != NULL)
11434 *lst = NULL;
11435 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011436 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011437
11438 size = xmlStrlen(string);
11439
11440 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011441 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011442 if (user_data != NULL)
11443 ctxt->userData = user_data;
11444 else
11445 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011446 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11447 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011448 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11449 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11450 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011451
11452 oldsax = ctxt->sax;
11453 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011454 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011455 ctxt->replaceEntities = oldctxt->replaceEntities;
11456 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011457
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011458 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011459 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011460 newDoc = xmlNewDoc(BAD_CAST "1.0");
11461 if (newDoc == NULL) {
11462 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011463 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011464 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011465 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011466 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011467 newDoc->dict = ctxt->dict;
11468 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011469 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011470 } else {
11471 ctxt->myDoc = oldctxt->myDoc;
11472 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011473 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011474 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011475 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11476 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011477 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011478 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011479 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011480 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011481 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011482 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011483 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011484 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011485 ctxt->myDoc->children = NULL;
11486 ctxt->myDoc->last = NULL;
11487 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011488 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011489 ctxt->instate = XML_PARSER_CONTENT;
11490 ctxt->depth = oldctxt->depth + 1;
11491
Daniel Veillard328f48c2002-11-15 15:24:34 +000011492 ctxt->validate = 0;
11493 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011494 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11495 /*
11496 * ID/IDREF registration will be done in xmlValidateElement below
11497 */
11498 ctxt->loadsubset |= XML_SKIP_IDS;
11499 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011500 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011501 ctxt->attsDefault = oldctxt->attsDefault;
11502 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011503
Daniel Veillard68e9e742002-11-16 15:35:11 +000011504 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011505 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011506 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011507 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011508 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011509 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011510 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011512 }
11513
11514 if (!ctxt->wellFormed) {
11515 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011516 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011517 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011518 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011519 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011520 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011521 }
11522
William M. Brack7b9154b2003-09-27 19:23:50 +000011523 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011524 xmlNodePtr cur;
11525
11526 /*
11527 * Return the newly created nodeset after unlinking it from
11528 * they pseudo parent.
11529 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011530 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011531 *lst = cur;
11532 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011533#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011534 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11535 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11536 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011537 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11538 oldctxt->myDoc, cur);
11539 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011540#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011541 cur->parent = NULL;
11542 cur = cur->next;
11543 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011544 ctxt->myDoc->children->children = NULL;
11545 }
11546 if (ctxt->myDoc != NULL) {
11547 xmlFreeNode(ctxt->myDoc->children);
11548 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011549 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011550 }
11551
11552 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011553 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011554 ctxt->attsDefault = NULL;
11555 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011557 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011558 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011559 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011560
11561 return(ret);
11562}
11563
Daniel Veillard29b17482004-08-16 00:39:03 +000011564/**
11565 * xmlParseInNodeContext:
11566 * @node: the context node
11567 * @data: the input string
11568 * @datalen: the input string length in bytes
11569 * @options: a combination of xmlParserOption
11570 * @lst: the return value for the set of parsed nodes
11571 *
11572 * Parse a well-balanced chunk of an XML document
11573 * within the context (DTD, namespaces, etc ...) of the given node.
11574 *
11575 * The allowed sequence for the data is a Well Balanced Chunk defined by
11576 * the content production in the XML grammar:
11577 *
11578 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11579 *
11580 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11581 * error code otherwise
11582 */
11583xmlParserErrors
11584xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11585 int options, xmlNodePtr *lst) {
11586#ifdef SAX2
11587 xmlParserCtxtPtr ctxt;
11588 xmlDocPtr doc = NULL;
11589 xmlNodePtr fake, cur;
11590 int nsnr = 0;
11591
11592 xmlParserErrors ret = XML_ERR_OK;
11593
11594 /*
11595 * check all input parameters, grab the document
11596 */
11597 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11598 return(XML_ERR_INTERNAL_ERROR);
11599 switch (node->type) {
11600 case XML_ELEMENT_NODE:
11601 case XML_ATTRIBUTE_NODE:
11602 case XML_TEXT_NODE:
11603 case XML_CDATA_SECTION_NODE:
11604 case XML_ENTITY_REF_NODE:
11605 case XML_PI_NODE:
11606 case XML_COMMENT_NODE:
11607 case XML_DOCUMENT_NODE:
11608 case XML_HTML_DOCUMENT_NODE:
11609 break;
11610 default:
11611 return(XML_ERR_INTERNAL_ERROR);
11612
11613 }
11614 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11615 (node->type != XML_DOCUMENT_NODE) &&
11616 (node->type != XML_HTML_DOCUMENT_NODE))
11617 node = node->parent;
11618 if (node == NULL)
11619 return(XML_ERR_INTERNAL_ERROR);
11620 if (node->type == XML_ELEMENT_NODE)
11621 doc = node->doc;
11622 else
11623 doc = (xmlDocPtr) node;
11624 if (doc == NULL)
11625 return(XML_ERR_INTERNAL_ERROR);
11626
11627 /*
11628 * allocate a context and set-up everything not related to the
11629 * node position in the tree
11630 */
11631 if (doc->type == XML_DOCUMENT_NODE)
11632 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11633#ifdef LIBXML_HTML_ENABLED
11634 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11635 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11636#endif
11637 else
11638 return(XML_ERR_INTERNAL_ERROR);
11639
11640 if (ctxt == NULL)
11641 return(XML_ERR_NO_MEMORY);
11642 fake = xmlNewComment(NULL);
11643 if (fake == NULL) {
11644 xmlFreeParserCtxt(ctxt);
11645 return(XML_ERR_NO_MEMORY);
11646 }
11647 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011648
11649 /*
11650 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11651 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11652 * we must wait until the last moment to free the original one.
11653 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011654 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011655 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011656 xmlDictFree(ctxt->dict);
11657 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011658 } else
11659 options |= XML_PARSE_NODICT;
11660
11661 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011662 xmlDetectSAX2(ctxt);
11663 ctxt->myDoc = doc;
11664
11665 if (node->type == XML_ELEMENT_NODE) {
11666 nodePush(ctxt, node);
11667 /*
11668 * initialize the SAX2 namespaces stack
11669 */
11670 cur = node;
11671 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11672 xmlNsPtr ns = cur->nsDef;
11673 const xmlChar *iprefix, *ihref;
11674
11675 while (ns != NULL) {
11676 if (ctxt->dict) {
11677 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11678 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11679 } else {
11680 iprefix = ns->prefix;
11681 ihref = ns->href;
11682 }
11683
11684 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11685 nsPush(ctxt, iprefix, ihref);
11686 nsnr++;
11687 }
11688 ns = ns->next;
11689 }
11690 cur = cur->parent;
11691 }
11692 ctxt->instate = XML_PARSER_CONTENT;
11693 }
11694
11695 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11696 /*
11697 * ID/IDREF registration will be done in xmlValidateElement below
11698 */
11699 ctxt->loadsubset |= XML_SKIP_IDS;
11700 }
11701
11702 xmlParseContent(ctxt);
11703 nsPop(ctxt, nsnr);
11704 if ((RAW == '<') && (NXT(1) == '/')) {
11705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11706 } else if (RAW != 0) {
11707 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11708 }
11709 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11710 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11711 ctxt->wellFormed = 0;
11712 }
11713
11714 if (!ctxt->wellFormed) {
11715 if (ctxt->errNo == 0)
11716 ret = XML_ERR_INTERNAL_ERROR;
11717 else
11718 ret = (xmlParserErrors)ctxt->errNo;
11719 } else {
11720 ret = XML_ERR_OK;
11721 }
11722
11723 /*
11724 * Return the newly created nodeset after unlinking it from
11725 * the pseudo sibling.
11726 */
11727
11728 cur = fake->next;
11729 fake->next = NULL;
11730 node->last = fake;
11731
11732 if (cur != NULL) {
11733 cur->prev = NULL;
11734 }
11735
11736 *lst = cur;
11737
11738 while (cur != NULL) {
11739 cur->parent = NULL;
11740 cur = cur->next;
11741 }
11742
11743 xmlUnlinkNode(fake);
11744 xmlFreeNode(fake);
11745
11746
11747 if (ret != XML_ERR_OK) {
11748 xmlFreeNodeList(*lst);
11749 *lst = NULL;
11750 }
William M. Brackc3f81342004-10-03 01:22:44 +000011751
William M. Brackb7b54de2004-10-06 16:38:01 +000011752 if (doc->dict != NULL)
11753 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011754 xmlFreeParserCtxt(ctxt);
11755
11756 return(ret);
11757#else /* !SAX2 */
11758 return(XML_ERR_INTERNAL_ERROR);
11759#endif
11760}
11761
Daniel Veillard81273902003-09-30 00:43:48 +000011762#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011763/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011764 * xmlParseBalancedChunkMemoryRecover:
11765 * @doc: the document the chunk pertains to
11766 * @sax: the SAX handler bloc (possibly NULL)
11767 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11768 * @depth: Used for loop detection, use 0
11769 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11770 * @lst: the return value for the set of parsed nodes
11771 * @recover: return nodes even if the data is broken (use 0)
11772 *
11773 *
11774 * Parse a well-balanced chunk of an XML document
11775 * called by the parser
11776 * The allowed sequence for the Well Balanced Chunk is the one defined by
11777 * the content production in the XML grammar:
11778 *
11779 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11780 *
11781 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11782 * the parser error code otherwise
11783 *
11784 * In case recover is set to 1, the nodelist will not be empty even if
11785 * the parsed chunk is not well balanced.
11786 */
11787int
11788xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11789 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11790 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011791 xmlParserCtxtPtr ctxt;
11792 xmlDocPtr newDoc;
11793 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011794 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011795 int size;
11796 int ret = 0;
11797
11798 if (depth > 40) {
11799 return(XML_ERR_ENTITY_LOOP);
11800 }
11801
11802
Daniel Veillardcda96922001-08-21 10:56:31 +000011803 if (lst != NULL)
11804 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011805 if (string == NULL)
11806 return(-1);
11807
11808 size = xmlStrlen(string);
11809
11810 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11811 if (ctxt == NULL) return(-1);
11812 ctxt->userData = ctxt;
11813 if (sax != NULL) {
11814 oldsax = ctxt->sax;
11815 ctxt->sax = sax;
11816 if (user_data != NULL)
11817 ctxt->userData = user_data;
11818 }
11819 newDoc = xmlNewDoc(BAD_CAST "1.0");
11820 if (newDoc == NULL) {
11821 xmlFreeParserCtxt(ctxt);
11822 return(-1);
11823 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011824 if ((doc != NULL) && (doc->dict != NULL)) {
11825 xmlDictFree(ctxt->dict);
11826 ctxt->dict = doc->dict;
11827 xmlDictReference(ctxt->dict);
11828 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11829 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11830 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11831 ctxt->dictNames = 1;
11832 } else {
11833 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11834 }
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if (doc != NULL) {
11836 newDoc->intSubset = doc->intSubset;
11837 newDoc->extSubset = doc->extSubset;
11838 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011839 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11840 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011841 if (sax != NULL)
11842 ctxt->sax = oldsax;
11843 xmlFreeParserCtxt(ctxt);
11844 newDoc->intSubset = NULL;
11845 newDoc->extSubset = NULL;
11846 xmlFreeDoc(newDoc);
11847 return(-1);
11848 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011849 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11850 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011851 if (doc == NULL) {
11852 ctxt->myDoc = newDoc;
11853 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011854 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011855 newDoc->children->doc = doc;
11856 }
11857 ctxt->instate = XML_PARSER_CONTENT;
11858 ctxt->depth = depth;
11859
11860 /*
11861 * Doing validity checking on chunk doesn't make sense
11862 */
11863 ctxt->validate = 0;
11864 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011865 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011866
Daniel Veillardb39bc392002-10-26 19:29:51 +000011867 if ( doc != NULL ){
11868 content = doc->children;
11869 doc->children = NULL;
11870 xmlParseContent(ctxt);
11871 doc->children = content;
11872 }
11873 else {
11874 xmlParseContent(ctxt);
11875 }
Owen Taylor3473f882001-02-23 17:55:21 +000011876 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011877 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011878 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011879 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011880 }
11881 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011882 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011883 }
11884
11885 if (!ctxt->wellFormed) {
11886 if (ctxt->errNo == 0)
11887 ret = 1;
11888 else
11889 ret = ctxt->errNo;
11890 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011891 ret = 0;
11892 }
11893
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011894 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11895 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011896
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011897 /*
11898 * Return the newly created nodeset after unlinking it from
11899 * they pseudo parent.
11900 */
11901 cur = newDoc->children->children;
11902 *lst = cur;
11903 while (cur != NULL) {
11904 xmlSetTreeDoc(cur, doc);
11905 cur->parent = NULL;
11906 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011908 newDoc->children->children = NULL;
11909 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011910
Owen Taylor3473f882001-02-23 17:55:21 +000011911 if (sax != NULL)
11912 ctxt->sax = oldsax;
11913 xmlFreeParserCtxt(ctxt);
11914 newDoc->intSubset = NULL;
11915 newDoc->extSubset = NULL;
11916 xmlFreeDoc(newDoc);
11917
11918 return(ret);
11919}
11920
11921/**
11922 * xmlSAXParseEntity:
11923 * @sax: the SAX handler block
11924 * @filename: the filename
11925 *
11926 * parse an XML external entity out of context and build a tree.
11927 * It use the given SAX function block to handle the parsing callback.
11928 * If sax is NULL, fallback to the default DOM tree building routines.
11929 *
11930 * [78] extParsedEnt ::= TextDecl? content
11931 *
11932 * This correspond to a "Well Balanced" chunk
11933 *
11934 * Returns the resulting document tree
11935 */
11936
11937xmlDocPtr
11938xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11939 xmlDocPtr ret;
11940 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011941
11942 ctxt = xmlCreateFileParserCtxt(filename);
11943 if (ctxt == NULL) {
11944 return(NULL);
11945 }
11946 if (sax != NULL) {
11947 if (ctxt->sax != NULL)
11948 xmlFree(ctxt->sax);
11949 ctxt->sax = sax;
11950 ctxt->userData = NULL;
11951 }
11952
Owen Taylor3473f882001-02-23 17:55:21 +000011953 xmlParseExtParsedEnt(ctxt);
11954
11955 if (ctxt->wellFormed)
11956 ret = ctxt->myDoc;
11957 else {
11958 ret = NULL;
11959 xmlFreeDoc(ctxt->myDoc);
11960 ctxt->myDoc = NULL;
11961 }
11962 if (sax != NULL)
11963 ctxt->sax = NULL;
11964 xmlFreeParserCtxt(ctxt);
11965
11966 return(ret);
11967}
11968
11969/**
11970 * xmlParseEntity:
11971 * @filename: the filename
11972 *
11973 * parse an XML external entity out of context and build a tree.
11974 *
11975 * [78] extParsedEnt ::= TextDecl? content
11976 *
11977 * This correspond to a "Well Balanced" chunk
11978 *
11979 * Returns the resulting document tree
11980 */
11981
11982xmlDocPtr
11983xmlParseEntity(const char *filename) {
11984 return(xmlSAXParseEntity(NULL, filename));
11985}
Daniel Veillard81273902003-09-30 00:43:48 +000011986#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011987
11988/**
11989 * xmlCreateEntityParserCtxt:
11990 * @URL: the entity URL
11991 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011992 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011993 *
11994 * Create a parser context for an external entity
11995 * Automatic support for ZLIB/Compress compressed document is provided
11996 * by default if found at compile-time.
11997 *
11998 * Returns the new parser context or NULL
11999 */
12000xmlParserCtxtPtr
12001xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12002 const xmlChar *base) {
12003 xmlParserCtxtPtr ctxt;
12004 xmlParserInputPtr inputStream;
12005 char *directory = NULL;
12006 xmlChar *uri;
12007
12008 ctxt = xmlNewParserCtxt();
12009 if (ctxt == NULL) {
12010 return(NULL);
12011 }
12012
12013 uri = xmlBuildURI(URL, base);
12014
12015 if (uri == NULL) {
12016 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12017 if (inputStream == NULL) {
12018 xmlFreeParserCtxt(ctxt);
12019 return(NULL);
12020 }
12021
12022 inputPush(ctxt, inputStream);
12023
12024 if ((ctxt->directory == NULL) && (directory == NULL))
12025 directory = xmlParserGetDirectory((char *)URL);
12026 if ((ctxt->directory == NULL) && (directory != NULL))
12027 ctxt->directory = directory;
12028 } else {
12029 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12030 if (inputStream == NULL) {
12031 xmlFree(uri);
12032 xmlFreeParserCtxt(ctxt);
12033 return(NULL);
12034 }
12035
12036 inputPush(ctxt, inputStream);
12037
12038 if ((ctxt->directory == NULL) && (directory == NULL))
12039 directory = xmlParserGetDirectory((char *)uri);
12040 if ((ctxt->directory == NULL) && (directory != NULL))
12041 ctxt->directory = directory;
12042 xmlFree(uri);
12043 }
Owen Taylor3473f882001-02-23 17:55:21 +000012044 return(ctxt);
12045}
12046
12047/************************************************************************
12048 * *
12049 * Front ends when parsing from a file *
12050 * *
12051 ************************************************************************/
12052
12053/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012054 * xmlCreateURLParserCtxt:
12055 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012056 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012057 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012058 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012059 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012060 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012061 *
12062 * Returns the new parser context or NULL
12063 */
12064xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012065xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012066{
12067 xmlParserCtxtPtr ctxt;
12068 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012069 char *directory = NULL;
12070
Owen Taylor3473f882001-02-23 17:55:21 +000012071 ctxt = xmlNewParserCtxt();
12072 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012073 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012074 return(NULL);
12075 }
12076
Daniel Veillarddf292f72005-01-16 19:00:15 +000012077 if (options)
12078 xmlCtxtUseOptions(ctxt, options);
12079 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012080
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012081 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012082 if (inputStream == NULL) {
12083 xmlFreeParserCtxt(ctxt);
12084 return(NULL);
12085 }
12086
Owen Taylor3473f882001-02-23 17:55:21 +000012087 inputPush(ctxt, inputStream);
12088 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012089 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012090 if ((ctxt->directory == NULL) && (directory != NULL))
12091 ctxt->directory = directory;
12092
12093 return(ctxt);
12094}
12095
Daniel Veillard61b93382003-11-03 14:28:31 +000012096/**
12097 * xmlCreateFileParserCtxt:
12098 * @filename: the filename
12099 *
12100 * Create a parser context for a file content.
12101 * Automatic support for ZLIB/Compress compressed document is provided
12102 * by default if found at compile-time.
12103 *
12104 * Returns the new parser context or NULL
12105 */
12106xmlParserCtxtPtr
12107xmlCreateFileParserCtxt(const char *filename)
12108{
12109 return(xmlCreateURLParserCtxt(filename, 0));
12110}
12111
Daniel Veillard81273902003-09-30 00:43:48 +000012112#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012113/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012114 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012115 * @sax: the SAX handler block
12116 * @filename: the filename
12117 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12118 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012119 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012120 *
12121 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12122 * compressed document is provided by default if found at compile-time.
12123 * It use the given SAX function block to handle the parsing callback.
12124 * If sax is NULL, fallback to the default DOM tree building routines.
12125 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012126 * User data (void *) is stored within the parser context in the
12127 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012128 *
Owen Taylor3473f882001-02-23 17:55:21 +000012129 * Returns the resulting document tree
12130 */
12131
12132xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012133xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12134 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012135 xmlDocPtr ret;
12136 xmlParserCtxtPtr ctxt;
12137 char *directory = NULL;
12138
Daniel Veillard635ef722001-10-29 11:48:19 +000012139 xmlInitParser();
12140
Owen Taylor3473f882001-02-23 17:55:21 +000012141 ctxt = xmlCreateFileParserCtxt(filename);
12142 if (ctxt == NULL) {
12143 return(NULL);
12144 }
12145 if (sax != NULL) {
12146 if (ctxt->sax != NULL)
12147 xmlFree(ctxt->sax);
12148 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012149 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012150 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012151 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012152 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012153 }
Owen Taylor3473f882001-02-23 17:55:21 +000012154
12155 if ((ctxt->directory == NULL) && (directory == NULL))
12156 directory = xmlParserGetDirectory(filename);
12157 if ((ctxt->directory == NULL) && (directory != NULL))
12158 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12159
Daniel Veillarddad3f682002-11-17 16:47:27 +000012160 ctxt->recovery = recovery;
12161
Owen Taylor3473f882001-02-23 17:55:21 +000012162 xmlParseDocument(ctxt);
12163
William M. Brackc07329e2003-09-08 01:57:30 +000012164 if ((ctxt->wellFormed) || recovery) {
12165 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012166 if (ret != NULL) {
12167 if (ctxt->input->buf->compressed > 0)
12168 ret->compression = 9;
12169 else
12170 ret->compression = ctxt->input->buf->compressed;
12171 }
William M. Brackc07329e2003-09-08 01:57:30 +000012172 }
Owen Taylor3473f882001-02-23 17:55:21 +000012173 else {
12174 ret = NULL;
12175 xmlFreeDoc(ctxt->myDoc);
12176 ctxt->myDoc = NULL;
12177 }
12178 if (sax != NULL)
12179 ctxt->sax = NULL;
12180 xmlFreeParserCtxt(ctxt);
12181
12182 return(ret);
12183}
12184
12185/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012186 * xmlSAXParseFile:
12187 * @sax: the SAX handler block
12188 * @filename: the filename
12189 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12190 * documents
12191 *
12192 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12193 * compressed document is provided by default if found at compile-time.
12194 * It use the given SAX function block to handle the parsing callback.
12195 * If sax is NULL, fallback to the default DOM tree building routines.
12196 *
12197 * Returns the resulting document tree
12198 */
12199
12200xmlDocPtr
12201xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12202 int recovery) {
12203 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12204}
12205
12206/**
Owen Taylor3473f882001-02-23 17:55:21 +000012207 * xmlRecoverDoc:
12208 * @cur: a pointer to an array of xmlChar
12209 *
12210 * parse an XML in-memory document and build a tree.
12211 * In the case the document is not Well Formed, a tree is built anyway
12212 *
12213 * Returns the resulting document tree
12214 */
12215
12216xmlDocPtr
12217xmlRecoverDoc(xmlChar *cur) {
12218 return(xmlSAXParseDoc(NULL, cur, 1));
12219}
12220
12221/**
12222 * xmlParseFile:
12223 * @filename: the filename
12224 *
12225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12226 * compressed document is provided by default if found at compile-time.
12227 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012228 * Returns the resulting document tree if the file was wellformed,
12229 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012230 */
12231
12232xmlDocPtr
12233xmlParseFile(const char *filename) {
12234 return(xmlSAXParseFile(NULL, filename, 0));
12235}
12236
12237/**
12238 * xmlRecoverFile:
12239 * @filename: the filename
12240 *
12241 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12242 * compressed document is provided by default if found at compile-time.
12243 * In the case the document is not Well Formed, a tree is built anyway
12244 *
12245 * Returns the resulting document tree
12246 */
12247
12248xmlDocPtr
12249xmlRecoverFile(const char *filename) {
12250 return(xmlSAXParseFile(NULL, filename, 1));
12251}
12252
12253
12254/**
12255 * xmlSetupParserForBuffer:
12256 * @ctxt: an XML parser context
12257 * @buffer: a xmlChar * buffer
12258 * @filename: a file name
12259 *
12260 * Setup the parser context to parse a new buffer; Clears any prior
12261 * contents from the parser context. The buffer parameter must not be
12262 * NULL, but the filename parameter can be
12263 */
12264void
12265xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12266 const char* filename)
12267{
12268 xmlParserInputPtr input;
12269
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012270 if ((ctxt == NULL) || (buffer == NULL))
12271 return;
12272
Owen Taylor3473f882001-02-23 17:55:21 +000012273 input = xmlNewInputStream(ctxt);
12274 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012275 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012276 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012277 return;
12278 }
12279
12280 xmlClearParserCtxt(ctxt);
12281 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012282 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012283 input->base = buffer;
12284 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012285 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012286 inputPush(ctxt, input);
12287}
12288
12289/**
12290 * xmlSAXUserParseFile:
12291 * @sax: a SAX handler
12292 * @user_data: The user data returned on SAX callbacks
12293 * @filename: a file name
12294 *
12295 * parse an XML file and call the given SAX handler routines.
12296 * Automatic support for ZLIB/Compress compressed document is provided
12297 *
12298 * Returns 0 in case of success or a error number otherwise
12299 */
12300int
12301xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12302 const char *filename) {
12303 int ret = 0;
12304 xmlParserCtxtPtr ctxt;
12305
12306 ctxt = xmlCreateFileParserCtxt(filename);
12307 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012308#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012309 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012310#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012311 xmlFree(ctxt->sax);
12312 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012313 xmlDetectSAX2(ctxt);
12314
Owen Taylor3473f882001-02-23 17:55:21 +000012315 if (user_data != NULL)
12316 ctxt->userData = user_data;
12317
12318 xmlParseDocument(ctxt);
12319
12320 if (ctxt->wellFormed)
12321 ret = 0;
12322 else {
12323 if (ctxt->errNo != 0)
12324 ret = ctxt->errNo;
12325 else
12326 ret = -1;
12327 }
12328 if (sax != NULL)
12329 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012330 if (ctxt->myDoc != NULL) {
12331 xmlFreeDoc(ctxt->myDoc);
12332 ctxt->myDoc = NULL;
12333 }
Owen Taylor3473f882001-02-23 17:55:21 +000012334 xmlFreeParserCtxt(ctxt);
12335
12336 return ret;
12337}
Daniel Veillard81273902003-09-30 00:43:48 +000012338#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012339
12340/************************************************************************
12341 * *
12342 * Front ends when parsing from memory *
12343 * *
12344 ************************************************************************/
12345
12346/**
12347 * xmlCreateMemoryParserCtxt:
12348 * @buffer: a pointer to a char array
12349 * @size: the size of the array
12350 *
12351 * Create a parser context for an XML in-memory document.
12352 *
12353 * Returns the new parser context or NULL
12354 */
12355xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012356xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012357 xmlParserCtxtPtr ctxt;
12358 xmlParserInputPtr input;
12359 xmlParserInputBufferPtr buf;
12360
12361 if (buffer == NULL)
12362 return(NULL);
12363 if (size <= 0)
12364 return(NULL);
12365
12366 ctxt = xmlNewParserCtxt();
12367 if (ctxt == NULL)
12368 return(NULL);
12369
Daniel Veillard53350552003-09-18 13:35:51 +000012370 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012371 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012372 if (buf == NULL) {
12373 xmlFreeParserCtxt(ctxt);
12374 return(NULL);
12375 }
Owen Taylor3473f882001-02-23 17:55:21 +000012376
12377 input = xmlNewInputStream(ctxt);
12378 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012379 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012380 xmlFreeParserCtxt(ctxt);
12381 return(NULL);
12382 }
12383
12384 input->filename = NULL;
12385 input->buf = buf;
12386 input->base = input->buf->buffer->content;
12387 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012388 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012389
12390 inputPush(ctxt, input);
12391 return(ctxt);
12392}
12393
Daniel Veillard81273902003-09-30 00:43:48 +000012394#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012395/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012396 * xmlSAXParseMemoryWithData:
12397 * @sax: the SAX handler block
12398 * @buffer: an pointer to a char array
12399 * @size: the size of the array
12400 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12401 * documents
12402 * @data: the userdata
12403 *
12404 * parse an XML in-memory block and use the given SAX function block
12405 * to handle the parsing callback. If sax is NULL, fallback to the default
12406 * DOM tree building routines.
12407 *
12408 * User data (void *) is stored within the parser context in the
12409 * context's _private member, so it is available nearly everywhere in libxml
12410 *
12411 * Returns the resulting document tree
12412 */
12413
12414xmlDocPtr
12415xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12416 int size, int recovery, void *data) {
12417 xmlDocPtr ret;
12418 xmlParserCtxtPtr ctxt;
12419
12420 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12421 if (ctxt == NULL) return(NULL);
12422 if (sax != NULL) {
12423 if (ctxt->sax != NULL)
12424 xmlFree(ctxt->sax);
12425 ctxt->sax = sax;
12426 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012427 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012428 if (data!=NULL) {
12429 ctxt->_private=data;
12430 }
12431
Daniel Veillardadba5f12003-04-04 16:09:01 +000012432 ctxt->recovery = recovery;
12433
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012434 xmlParseDocument(ctxt);
12435
12436 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12437 else {
12438 ret = NULL;
12439 xmlFreeDoc(ctxt->myDoc);
12440 ctxt->myDoc = NULL;
12441 }
12442 if (sax != NULL)
12443 ctxt->sax = NULL;
12444 xmlFreeParserCtxt(ctxt);
12445
12446 return(ret);
12447}
12448
12449/**
Owen Taylor3473f882001-02-23 17:55:21 +000012450 * xmlSAXParseMemory:
12451 * @sax: the SAX handler block
12452 * @buffer: an pointer to a char array
12453 * @size: the size of the array
12454 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12455 * documents
12456 *
12457 * parse an XML in-memory block and use the given SAX function block
12458 * to handle the parsing callback. If sax is NULL, fallback to the default
12459 * DOM tree building routines.
12460 *
12461 * Returns the resulting document tree
12462 */
12463xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012464xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12465 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012466 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012467}
12468
12469/**
12470 * xmlParseMemory:
12471 * @buffer: an pointer to a char array
12472 * @size: the size of the array
12473 *
12474 * parse an XML in-memory block and build a tree.
12475 *
12476 * Returns the resulting document tree
12477 */
12478
Daniel Veillard50822cb2001-07-26 20:05:51 +000012479xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012480 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12481}
12482
12483/**
12484 * xmlRecoverMemory:
12485 * @buffer: an pointer to a char array
12486 * @size: the size of the array
12487 *
12488 * parse an XML in-memory block and build a tree.
12489 * In the case the document is not Well Formed, a tree is built anyway
12490 *
12491 * Returns the resulting document tree
12492 */
12493
Daniel Veillard50822cb2001-07-26 20:05:51 +000012494xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012495 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12496}
12497
12498/**
12499 * xmlSAXUserParseMemory:
12500 * @sax: a SAX handler
12501 * @user_data: The user data returned on SAX callbacks
12502 * @buffer: an in-memory XML document input
12503 * @size: the length of the XML document in bytes
12504 *
12505 * A better SAX parsing routine.
12506 * parse an XML in-memory buffer and call the given SAX handler routines.
12507 *
12508 * Returns 0 in case of success or a error number otherwise
12509 */
12510int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012511 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012512 int ret = 0;
12513 xmlParserCtxtPtr ctxt;
12514 xmlSAXHandlerPtr oldsax = NULL;
12515
Daniel Veillard9e923512002-08-14 08:48:52 +000012516 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012517 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12518 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012519 oldsax = ctxt->sax;
12520 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012521 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012522 if (user_data != NULL)
12523 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012524
12525 xmlParseDocument(ctxt);
12526
12527 if (ctxt->wellFormed)
12528 ret = 0;
12529 else {
12530 if (ctxt->errNo != 0)
12531 ret = ctxt->errNo;
12532 else
12533 ret = -1;
12534 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012535 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012536 if (ctxt->myDoc != NULL) {
12537 xmlFreeDoc(ctxt->myDoc);
12538 ctxt->myDoc = NULL;
12539 }
Owen Taylor3473f882001-02-23 17:55:21 +000012540 xmlFreeParserCtxt(ctxt);
12541
12542 return ret;
12543}
Daniel Veillard81273902003-09-30 00:43:48 +000012544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012545
12546/**
12547 * xmlCreateDocParserCtxt:
12548 * @cur: a pointer to an array of xmlChar
12549 *
12550 * Creates a parser context for an XML in-memory document.
12551 *
12552 * Returns the new parser context or NULL
12553 */
12554xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012555xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012556 int len;
12557
12558 if (cur == NULL)
12559 return(NULL);
12560 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012562}
12563
Daniel Veillard81273902003-09-30 00:43:48 +000012564#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012565/**
12566 * xmlSAXParseDoc:
12567 * @sax: the SAX handler block
12568 * @cur: a pointer to an array of xmlChar
12569 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12570 * documents
12571 *
12572 * parse an XML in-memory document and build a tree.
12573 * It use the given SAX function block to handle the parsing callback.
12574 * If sax is NULL, fallback to the default DOM tree building routines.
12575 *
12576 * Returns the resulting document tree
12577 */
12578
12579xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012580xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012581 xmlDocPtr ret;
12582 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012583 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012584
Daniel Veillard38936062004-11-04 17:45:11 +000012585 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012586
12587
12588 ctxt = xmlCreateDocParserCtxt(cur);
12589 if (ctxt == NULL) return(NULL);
12590 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012591 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012592 ctxt->sax = sax;
12593 ctxt->userData = NULL;
12594 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012595 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012596
12597 xmlParseDocument(ctxt);
12598 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12599 else {
12600 ret = NULL;
12601 xmlFreeDoc(ctxt->myDoc);
12602 ctxt->myDoc = NULL;
12603 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012604 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012605 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012606 xmlFreeParserCtxt(ctxt);
12607
12608 return(ret);
12609}
12610
12611/**
12612 * xmlParseDoc:
12613 * @cur: a pointer to an array of xmlChar
12614 *
12615 * parse an XML in-memory document and build a tree.
12616 *
12617 * Returns the resulting document tree
12618 */
12619
12620xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012621xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012622 return(xmlSAXParseDoc(NULL, cur, 0));
12623}
Daniel Veillard81273902003-09-30 00:43:48 +000012624#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012625
Daniel Veillard81273902003-09-30 00:43:48 +000012626#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012627/************************************************************************
12628 * *
12629 * Specific function to keep track of entities references *
12630 * and used by the XSLT debugger *
12631 * *
12632 ************************************************************************/
12633
12634static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12635
12636/**
12637 * xmlAddEntityReference:
12638 * @ent : A valid entity
12639 * @firstNode : A valid first node for children of entity
12640 * @lastNode : A valid last node of children entity
12641 *
12642 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12643 */
12644static void
12645xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12646 xmlNodePtr lastNode)
12647{
12648 if (xmlEntityRefFunc != NULL) {
12649 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12650 }
12651}
12652
12653
12654/**
12655 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012656 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012657 *
12658 * Set the function to call call back when a xml reference has been made
12659 */
12660void
12661xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12662{
12663 xmlEntityRefFunc = func;
12664}
Daniel Veillard81273902003-09-30 00:43:48 +000012665#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012666
12667/************************************************************************
12668 * *
12669 * Miscellaneous *
12670 * *
12671 ************************************************************************/
12672
12673#ifdef LIBXML_XPATH_ENABLED
12674#include <libxml/xpath.h>
12675#endif
12676
Daniel Veillardffa3c742005-07-21 13:24:09 +000012677extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012678static int xmlParserInitialized = 0;
12679
12680/**
12681 * xmlInitParser:
12682 *
12683 * Initialization function for the XML parser.
12684 * This is not reentrant. Call once before processing in case of
12685 * use in multithreaded programs.
12686 */
12687
12688void
12689xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012690 if (xmlParserInitialized != 0)
12691 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012692
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012693 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12694 (xmlGenericError == NULL))
12695 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012696 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012697 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012698 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012699 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012700 xmlDefaultSAXHandlerInit();
12701 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012702#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012703 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012704#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012705#ifdef LIBXML_HTML_ENABLED
12706 htmlInitAutoClose();
12707 htmlDefaultSAXHandlerInit();
12708#endif
12709#ifdef LIBXML_XPATH_ENABLED
12710 xmlXPathInit();
12711#endif
12712 xmlParserInitialized = 1;
12713}
12714
12715/**
12716 * xmlCleanupParser:
12717 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012718 * Cleanup function for the XML library. It tries to reclaim all
12719 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012720 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012721 * function should not prevent reusing the library but one should
12722 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012723 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012724 */
12725
12726void
12727xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012728 if (!xmlParserInitialized)
12729 return;
12730
Owen Taylor3473f882001-02-23 17:55:21 +000012731 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012732#ifdef LIBXML_CATALOG_ENABLED
12733 xmlCatalogCleanup();
12734#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012735 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012736 xmlCleanupInputCallbacks();
12737#ifdef LIBXML_OUTPUT_ENABLED
12738 xmlCleanupOutputCallbacks();
12739#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012740#ifdef LIBXML_SCHEMAS_ENABLED
12741 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012742 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012743#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012744 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012745 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012746 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012747 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012748 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012749}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012750
12751/************************************************************************
12752 * *
12753 * New set (2.6.0) of simpler and more flexible APIs *
12754 * *
12755 ************************************************************************/
12756
12757/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012758 * DICT_FREE:
12759 * @str: a string
12760 *
12761 * Free a string if it is not owned by the "dict" dictionnary in the
12762 * current scope
12763 */
12764#define DICT_FREE(str) \
12765 if ((str) && ((!dict) || \
12766 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12767 xmlFree((char *)(str));
12768
12769/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012770 * xmlCtxtReset:
12771 * @ctxt: an XML parser context
12772 *
12773 * Reset a parser context
12774 */
12775void
12776xmlCtxtReset(xmlParserCtxtPtr ctxt)
12777{
12778 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012779 xmlDictPtr dict;
12780
12781 if (ctxt == NULL)
12782 return;
12783
12784 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012785
12786 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12787 xmlFreeInputStream(input);
12788 }
12789 ctxt->inputNr = 0;
12790 ctxt->input = NULL;
12791
12792 ctxt->spaceNr = 0;
12793 ctxt->spaceTab[0] = -1;
12794 ctxt->space = &ctxt->spaceTab[0];
12795
12796
12797 ctxt->nodeNr = 0;
12798 ctxt->node = NULL;
12799
12800 ctxt->nameNr = 0;
12801 ctxt->name = NULL;
12802
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012803 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012804 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012805 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012806 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012807 DICT_FREE(ctxt->directory);
12808 ctxt->directory = NULL;
12809 DICT_FREE(ctxt->extSubURI);
12810 ctxt->extSubURI = NULL;
12811 DICT_FREE(ctxt->extSubSystem);
12812 ctxt->extSubSystem = NULL;
12813 if (ctxt->myDoc != NULL)
12814 xmlFreeDoc(ctxt->myDoc);
12815 ctxt->myDoc = NULL;
12816
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012817 ctxt->standalone = -1;
12818 ctxt->hasExternalSubset = 0;
12819 ctxt->hasPErefs = 0;
12820 ctxt->html = 0;
12821 ctxt->external = 0;
12822 ctxt->instate = XML_PARSER_START;
12823 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012824
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012825 ctxt->wellFormed = 1;
12826 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012827 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012828 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012829#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012830 ctxt->vctxt.userData = ctxt;
12831 ctxt->vctxt.error = xmlParserValidityError;
12832 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012833#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012834 ctxt->record_info = 0;
12835 ctxt->nbChars = 0;
12836 ctxt->checkIndex = 0;
12837 ctxt->inSubset = 0;
12838 ctxt->errNo = XML_ERR_OK;
12839 ctxt->depth = 0;
12840 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12841 ctxt->catalogs = NULL;
12842 xmlInitNodeInfoSeq(&ctxt->node_seq);
12843
12844 if (ctxt->attsDefault != NULL) {
12845 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12846 ctxt->attsDefault = NULL;
12847 }
12848 if (ctxt->attsSpecial != NULL) {
12849 xmlHashFree(ctxt->attsSpecial, NULL);
12850 ctxt->attsSpecial = NULL;
12851 }
12852
Daniel Veillard4432df22003-09-28 18:58:27 +000012853#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012854 if (ctxt->catalogs != NULL)
12855 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012856#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012857 if (ctxt->lastError.code != XML_ERR_OK)
12858 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012859}
12860
12861/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012862 * xmlCtxtResetPush:
12863 * @ctxt: an XML parser context
12864 * @chunk: a pointer to an array of chars
12865 * @size: number of chars in the array
12866 * @filename: an optional file name or URI
12867 * @encoding: the document encoding, or NULL
12868 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012869 * Reset a push parser context
12870 *
12871 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012872 */
12873int
12874xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12875 int size, const char *filename, const char *encoding)
12876{
12877 xmlParserInputPtr inputStream;
12878 xmlParserInputBufferPtr buf;
12879 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12880
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012881 if (ctxt == NULL)
12882 return(1);
12883
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012884 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12885 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12886
12887 buf = xmlAllocParserInputBuffer(enc);
12888 if (buf == NULL)
12889 return(1);
12890
12891 if (ctxt == NULL) {
12892 xmlFreeParserInputBuffer(buf);
12893 return(1);
12894 }
12895
12896 xmlCtxtReset(ctxt);
12897
12898 if (ctxt->pushTab == NULL) {
12899 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12900 sizeof(xmlChar *));
12901 if (ctxt->pushTab == NULL) {
12902 xmlErrMemory(ctxt, NULL);
12903 xmlFreeParserInputBuffer(buf);
12904 return(1);
12905 }
12906 }
12907
12908 if (filename == NULL) {
12909 ctxt->directory = NULL;
12910 } else {
12911 ctxt->directory = xmlParserGetDirectory(filename);
12912 }
12913
12914 inputStream = xmlNewInputStream(ctxt);
12915 if (inputStream == NULL) {
12916 xmlFreeParserInputBuffer(buf);
12917 return(1);
12918 }
12919
12920 if (filename == NULL)
12921 inputStream->filename = NULL;
12922 else
12923 inputStream->filename = (char *)
12924 xmlCanonicPath((const xmlChar *) filename);
12925 inputStream->buf = buf;
12926 inputStream->base = inputStream->buf->buffer->content;
12927 inputStream->cur = inputStream->buf->buffer->content;
12928 inputStream->end =
12929 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12930
12931 inputPush(ctxt, inputStream);
12932
12933 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12934 (ctxt->input->buf != NULL)) {
12935 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12936 int cur = ctxt->input->cur - ctxt->input->base;
12937
12938 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12939
12940 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12941 ctxt->input->cur = ctxt->input->base + cur;
12942 ctxt->input->end =
12943 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12944 use];
12945#ifdef DEBUG_PUSH
12946 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12947#endif
12948 }
12949
12950 if (encoding != NULL) {
12951 xmlCharEncodingHandlerPtr hdlr;
12952
12953 hdlr = xmlFindCharEncodingHandler(encoding);
12954 if (hdlr != NULL) {
12955 xmlSwitchToEncoding(ctxt, hdlr);
12956 } else {
12957 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12958 "Unsupported encoding %s\n", BAD_CAST encoding);
12959 }
12960 } else if (enc != XML_CHAR_ENCODING_NONE) {
12961 xmlSwitchEncoding(ctxt, enc);
12962 }
12963
12964 return(0);
12965}
12966
12967/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012968 * xmlCtxtUseOptions:
12969 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012970 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 *
12972 * Applies the options to the parser context
12973 *
12974 * Returns 0 in case of success, the set of unknown or unimplemented options
12975 * in case of error.
12976 */
12977int
12978xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12979{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012980 if (ctxt == NULL)
12981 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012982 if (options & XML_PARSE_RECOVER) {
12983 ctxt->recovery = 1;
12984 options -= XML_PARSE_RECOVER;
12985 } else
12986 ctxt->recovery = 0;
12987 if (options & XML_PARSE_DTDLOAD) {
12988 ctxt->loadsubset = XML_DETECT_IDS;
12989 options -= XML_PARSE_DTDLOAD;
12990 } else
12991 ctxt->loadsubset = 0;
12992 if (options & XML_PARSE_DTDATTR) {
12993 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12994 options -= XML_PARSE_DTDATTR;
12995 }
12996 if (options & XML_PARSE_NOENT) {
12997 ctxt->replaceEntities = 1;
12998 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12999 options -= XML_PARSE_NOENT;
13000 } else
13001 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013002 if (options & XML_PARSE_PEDANTIC) {
13003 ctxt->pedantic = 1;
13004 options -= XML_PARSE_PEDANTIC;
13005 } else
13006 ctxt->pedantic = 0;
13007 if (options & XML_PARSE_NOBLANKS) {
13008 ctxt->keepBlanks = 0;
13009 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13010 options -= XML_PARSE_NOBLANKS;
13011 } else
13012 ctxt->keepBlanks = 1;
13013 if (options & XML_PARSE_DTDVALID) {
13014 ctxt->validate = 1;
13015 if (options & XML_PARSE_NOWARNING)
13016 ctxt->vctxt.warning = NULL;
13017 if (options & XML_PARSE_NOERROR)
13018 ctxt->vctxt.error = NULL;
13019 options -= XML_PARSE_DTDVALID;
13020 } else
13021 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013022 if (options & XML_PARSE_NOWARNING) {
13023 ctxt->sax->warning = NULL;
13024 options -= XML_PARSE_NOWARNING;
13025 }
13026 if (options & XML_PARSE_NOERROR) {
13027 ctxt->sax->error = NULL;
13028 ctxt->sax->fatalError = NULL;
13029 options -= XML_PARSE_NOERROR;
13030 }
Daniel Veillard81273902003-09-30 00:43:48 +000013031#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013032 if (options & XML_PARSE_SAX1) {
13033 ctxt->sax->startElement = xmlSAX2StartElement;
13034 ctxt->sax->endElement = xmlSAX2EndElement;
13035 ctxt->sax->startElementNs = NULL;
13036 ctxt->sax->endElementNs = NULL;
13037 ctxt->sax->initialized = 1;
13038 options -= XML_PARSE_SAX1;
13039 }
Daniel Veillard81273902003-09-30 00:43:48 +000013040#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013041 if (options & XML_PARSE_NODICT) {
13042 ctxt->dictNames = 0;
13043 options -= XML_PARSE_NODICT;
13044 } else {
13045 ctxt->dictNames = 1;
13046 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013047 if (options & XML_PARSE_NOCDATA) {
13048 ctxt->sax->cdataBlock = NULL;
13049 options -= XML_PARSE_NOCDATA;
13050 }
13051 if (options & XML_PARSE_NSCLEAN) {
13052 ctxt->options |= XML_PARSE_NSCLEAN;
13053 options -= XML_PARSE_NSCLEAN;
13054 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013055 if (options & XML_PARSE_NONET) {
13056 ctxt->options |= XML_PARSE_NONET;
13057 options -= XML_PARSE_NONET;
13058 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013059 if (options & XML_PARSE_COMPACT) {
13060 ctxt->options |= XML_PARSE_COMPACT;
13061 options -= XML_PARSE_COMPACT;
13062 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013063 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013064 return (options);
13065}
13066
13067/**
13068 * xmlDoRead:
13069 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013070 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013071 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013072 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013073 * @reuse: keep the context for reuse
13074 *
13075 * Common front-end for the xmlRead functions
13076 *
13077 * Returns the resulting document tree or NULL
13078 */
13079static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013080xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13081 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013082{
13083 xmlDocPtr ret;
13084
13085 xmlCtxtUseOptions(ctxt, options);
13086 if (encoding != NULL) {
13087 xmlCharEncodingHandlerPtr hdlr;
13088
13089 hdlr = xmlFindCharEncodingHandler(encoding);
13090 if (hdlr != NULL)
13091 xmlSwitchToEncoding(ctxt, hdlr);
13092 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013093 if ((URL != NULL) && (ctxt->input != NULL) &&
13094 (ctxt->input->filename == NULL))
13095 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013096 xmlParseDocument(ctxt);
13097 if ((ctxt->wellFormed) || ctxt->recovery)
13098 ret = ctxt->myDoc;
13099 else {
13100 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013101 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013102 xmlFreeDoc(ctxt->myDoc);
13103 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013104 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013105 ctxt->myDoc = NULL;
13106 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013107 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013108 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013109
13110 return (ret);
13111}
13112
13113/**
13114 * xmlReadDoc:
13115 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013116 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013117 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013118 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013119 *
13120 * parse an XML in-memory document and build a tree.
13121 *
13122 * Returns the resulting document tree
13123 */
13124xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013125xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013126{
13127 xmlParserCtxtPtr ctxt;
13128
13129 if (cur == NULL)
13130 return (NULL);
13131
13132 ctxt = xmlCreateDocParserCtxt(cur);
13133 if (ctxt == NULL)
13134 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013135 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013136}
13137
13138/**
13139 * xmlReadFile:
13140 * @filename: a file or URL
13141 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013142 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013143 *
13144 * parse an XML file from the filesystem or the network.
13145 *
13146 * Returns the resulting document tree
13147 */
13148xmlDocPtr
13149xmlReadFile(const char *filename, const char *encoding, int options)
13150{
13151 xmlParserCtxtPtr ctxt;
13152
Daniel Veillard61b93382003-11-03 14:28:31 +000013153 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013154 if (ctxt == NULL)
13155 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013156 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013157}
13158
13159/**
13160 * xmlReadMemory:
13161 * @buffer: a pointer to a char array
13162 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013163 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013164 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013165 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013166 *
13167 * parse an XML in-memory document and build a tree.
13168 *
13169 * Returns the resulting document tree
13170 */
13171xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013172xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013173{
13174 xmlParserCtxtPtr ctxt;
13175
13176 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13177 if (ctxt == NULL)
13178 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013179 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013180}
13181
13182/**
13183 * xmlReadFd:
13184 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013185 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013186 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013187 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013188 *
13189 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013190 * NOTE that the file descriptor will not be closed when the
13191 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013192 *
13193 * Returns the resulting document tree
13194 */
13195xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013196xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013197{
13198 xmlParserCtxtPtr ctxt;
13199 xmlParserInputBufferPtr input;
13200 xmlParserInputPtr stream;
13201
13202 if (fd < 0)
13203 return (NULL);
13204
13205 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13206 if (input == NULL)
13207 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013208 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013209 ctxt = xmlNewParserCtxt();
13210 if (ctxt == NULL) {
13211 xmlFreeParserInputBuffer(input);
13212 return (NULL);
13213 }
13214 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13215 if (stream == NULL) {
13216 xmlFreeParserInputBuffer(input);
13217 xmlFreeParserCtxt(ctxt);
13218 return (NULL);
13219 }
13220 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013221 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013222}
13223
13224/**
13225 * xmlReadIO:
13226 * @ioread: an I/O read function
13227 * @ioclose: an I/O close function
13228 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013229 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013230 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013231 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013232 *
13233 * parse an XML document from I/O functions and source and build a tree.
13234 *
13235 * Returns the resulting document tree
13236 */
13237xmlDocPtr
13238xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013239 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013240{
13241 xmlParserCtxtPtr ctxt;
13242 xmlParserInputBufferPtr input;
13243 xmlParserInputPtr stream;
13244
13245 if (ioread == NULL)
13246 return (NULL);
13247
13248 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13249 XML_CHAR_ENCODING_NONE);
13250 if (input == NULL)
13251 return (NULL);
13252 ctxt = xmlNewParserCtxt();
13253 if (ctxt == NULL) {
13254 xmlFreeParserInputBuffer(input);
13255 return (NULL);
13256 }
13257 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13258 if (stream == NULL) {
13259 xmlFreeParserInputBuffer(input);
13260 xmlFreeParserCtxt(ctxt);
13261 return (NULL);
13262 }
13263 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013264 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013265}
13266
13267/**
13268 * xmlCtxtReadDoc:
13269 * @ctxt: an XML parser context
13270 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013271 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013272 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013273 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013274 *
13275 * parse an XML in-memory document and build a tree.
13276 * This reuses the existing @ctxt parser context
13277 *
13278 * Returns the resulting document tree
13279 */
13280xmlDocPtr
13281xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013282 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013283{
13284 xmlParserInputPtr stream;
13285
13286 if (cur == NULL)
13287 return (NULL);
13288 if (ctxt == NULL)
13289 return (NULL);
13290
13291 xmlCtxtReset(ctxt);
13292
13293 stream = xmlNewStringInputStream(ctxt, cur);
13294 if (stream == NULL) {
13295 return (NULL);
13296 }
13297 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013298 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013299}
13300
13301/**
13302 * xmlCtxtReadFile:
13303 * @ctxt: an XML parser context
13304 * @filename: a file or URL
13305 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013306 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013307 *
13308 * parse an XML file from the filesystem or the network.
13309 * This reuses the existing @ctxt parser context
13310 *
13311 * Returns the resulting document tree
13312 */
13313xmlDocPtr
13314xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13315 const char *encoding, int options)
13316{
13317 xmlParserInputPtr stream;
13318
13319 if (filename == NULL)
13320 return (NULL);
13321 if (ctxt == NULL)
13322 return (NULL);
13323
13324 xmlCtxtReset(ctxt);
13325
Daniel Veillard29614c72004-11-26 10:47:26 +000013326 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327 if (stream == NULL) {
13328 return (NULL);
13329 }
13330 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013331 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013332}
13333
13334/**
13335 * xmlCtxtReadMemory:
13336 * @ctxt: an XML parser context
13337 * @buffer: a pointer to a char array
13338 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013339 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013340 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013341 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013342 *
13343 * parse an XML in-memory document and build a tree.
13344 * This reuses the existing @ctxt parser context
13345 *
13346 * Returns the resulting document tree
13347 */
13348xmlDocPtr
13349xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013350 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013351{
13352 xmlParserInputBufferPtr input;
13353 xmlParserInputPtr stream;
13354
13355 if (ctxt == NULL)
13356 return (NULL);
13357 if (buffer == NULL)
13358 return (NULL);
13359
13360 xmlCtxtReset(ctxt);
13361
13362 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13363 if (input == NULL) {
13364 return(NULL);
13365 }
13366
13367 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13368 if (stream == NULL) {
13369 xmlFreeParserInputBuffer(input);
13370 return(NULL);
13371 }
13372
13373 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013374 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013375}
13376
13377/**
13378 * xmlCtxtReadFd:
13379 * @ctxt: an XML parser context
13380 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013381 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013382 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013383 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013384 *
13385 * parse an XML from a file descriptor and build a tree.
13386 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013387 * NOTE that the file descriptor will not be closed when the
13388 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013389 *
13390 * Returns the resulting document tree
13391 */
13392xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013393xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13394 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013395{
13396 xmlParserInputBufferPtr input;
13397 xmlParserInputPtr stream;
13398
13399 if (fd < 0)
13400 return (NULL);
13401 if (ctxt == NULL)
13402 return (NULL);
13403
13404 xmlCtxtReset(ctxt);
13405
13406
13407 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13408 if (input == NULL)
13409 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013410 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013411 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13412 if (stream == NULL) {
13413 xmlFreeParserInputBuffer(input);
13414 return (NULL);
13415 }
13416 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013417 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013418}
13419
13420/**
13421 * xmlCtxtReadIO:
13422 * @ctxt: an XML parser context
13423 * @ioread: an I/O read function
13424 * @ioclose: an I/O close function
13425 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013426 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013427 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013428 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013429 *
13430 * parse an XML document from I/O functions and source and build a tree.
13431 * This reuses the existing @ctxt parser context
13432 *
13433 * Returns the resulting document tree
13434 */
13435xmlDocPtr
13436xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13437 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013438 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013439 const char *encoding, int options)
13440{
13441 xmlParserInputBufferPtr input;
13442 xmlParserInputPtr stream;
13443
13444 if (ioread == NULL)
13445 return (NULL);
13446 if (ctxt == NULL)
13447 return (NULL);
13448
13449 xmlCtxtReset(ctxt);
13450
13451 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13452 XML_CHAR_ENCODING_NONE);
13453 if (input == NULL)
13454 return (NULL);
13455 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13456 if (stream == NULL) {
13457 xmlFreeParserInputBuffer(input);
13458 return (NULL);
13459 }
13460 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013461 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013463
13464#define bottom_parser
13465#include "elfgcchack.h"