blob: 9aa9698c99a0c3c7ca794537800db87699575ab1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000413 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000414 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000415 schannel = ctxt->sax->serror;
416 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000417 (ctxt->sax) ? ctxt->sax->warning : NULL,
418 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000419 ctxt, NULL, XML_FROM_PARSER, error,
420 XML_ERR_WARNING, NULL, 0,
421 (const char *) str1, (const char *) str2, NULL, 0, 0,
422 msg, (const char *) str1, (const char *) str2);
423}
424
425/**
426 * xmlValidityError:
427 * @ctxt: an XML parser context
428 * @error: the error number
429 * @msg: the error message
430 * @str1: extra data
431 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000432 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000433 */
434static void
435xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
436 const char *msg, const xmlChar *str1)
437{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000438 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000439
440 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
441 (ctxt->instate == XML_PARSER_EOF))
442 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000443 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000444 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000445 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000446 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000447 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000448 ctxt, NULL, XML_FROM_DTD, error,
449 XML_ERR_ERROR, NULL, 0, (const char *) str1,
450 NULL, NULL, 0, 0,
451 msg, (const char *) str1);
452 ctxt->valid = 0;
453}
454
455/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 * xmlFatalErrMsgInt:
457 * @ctxt: an XML parser context
458 * @error: the error number
459 * @msg: the error message
460 * @val: an integer value
461 *
462 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
463 */
464static void
465xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000467{
Daniel Veillard157fee02003-10-31 10:36:03 +0000468 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
469 (ctxt->instate == XML_PARSER_EOF))
470 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000471 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000472 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
474 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000475 ctxt->wellFormed = 0;
476 if (ctxt->recovery == 0)
477 ctxt->disableSAX = 1;
478}
479
480/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000481 * xmlFatalErrMsgStrIntStr:
482 * @ctxt: an XML parser context
483 * @error: the error number
484 * @msg: the error message
485 * @str1: an string info
486 * @val: an integer value
487 * @str2: an string info
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
492xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg, const xmlChar *str1, int val,
494 const xmlChar *str2)
495{
Daniel Veillard157fee02003-10-31 10:36:03 +0000496 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497 (ctxt->instate == XML_PARSER_EOF))
498 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000499 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000500 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000501 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
502 NULL, 0, (const char *) str1, (const char *) str2,
503 NULL, val, 0, msg, str1, val, str2);
504 ctxt->wellFormed = 0;
505 if (ctxt->recovery == 0)
506 ctxt->disableSAX = 1;
507}
508
509/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000510 * xmlFatalErrMsgStr:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @val: a string value
515 *
516 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
517 */
518static void
519xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000520 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000521{
Daniel Veillard157fee02003-10-31 10:36:03 +0000522 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
523 (ctxt->instate == XML_PARSER_EOF))
524 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000525 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000526 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 XML_FROM_PARSER, error, XML_ERR_FATAL,
528 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
529 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000530 ctxt->wellFormed = 0;
531 if (ctxt->recovery == 0)
532 ctxt->disableSAX = 1;
533}
534
535/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000536 * xmlErrMsgStr:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @val: a string value
541 *
542 * Handle a non fatal parser error
543 */
544static void
545xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
546 const char *msg, const xmlChar * val)
547{
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000551 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000553 XML_FROM_PARSER, error, XML_ERR_ERROR,
554 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
555 val);
556}
557
558/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000559 * xmlNsErr:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the message
563 * @info1: extra information string
564 * @info2: extra information string
565 *
566 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
567 */
568static void
569xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
570 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000571 const xmlChar * info1, const xmlChar * info2,
572 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000573{
Daniel Veillard157fee02003-10-31 10:36:03 +0000574 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
575 (ctxt->instate == XML_PARSER_EOF))
576 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000579 XML_ERR_ERROR, NULL, 0, (const char *) info1,
580 (const char *) info2, (const char *) info3, 0, 0, msg,
581 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 ctxt->nsWellFormed = 0;
583}
584
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000585/************************************************************************
586 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000587 * SAX2 defaulted attributes handling *
588 * *
589 ************************************************************************/
590
591/**
592 * xmlDetectSAX2:
593 * @ctxt: an XML parser context
594 *
595 * Do the SAX2 detection and specific intialization
596 */
597static void
598xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
599 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000601 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
602 ((ctxt->sax->startElementNs != NULL) ||
603 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000604#else
605 ctxt->sax2 = 1;
606#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000607
608 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
609 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
610 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000611 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
612 (ctxt->str_xml_ns == NULL)) {
613 xmlErrMemory(ctxt, NULL);
614 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000615}
616
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617typedef struct _xmlDefAttrs xmlDefAttrs;
618typedef xmlDefAttrs *xmlDefAttrsPtr;
619struct _xmlDefAttrs {
620 int nbAttrs; /* number of defaulted attributes on that element */
621 int maxAttrs; /* the size of the array */
622 const xmlChar *values[4]; /* array of localname/prefix/values */
623};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000624
625/**
626 * xmlAddDefAttrs:
627 * @ctxt: an XML parser context
628 * @fullname: the element fullname
629 * @fullattr: the attribute fullname
630 * @value: the attribute value
631 *
632 * Add a defaulted attribute for an element
633 */
634static void
635xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
636 const xmlChar *fullname,
637 const xmlChar *fullattr,
638 const xmlChar *value) {
639 xmlDefAttrsPtr defaults;
640 int len;
641 const xmlChar *name;
642 const xmlChar *prefix;
643
644 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000645 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000646 if (ctxt->attsDefault == NULL)
647 goto mem_error;
648 }
649
650 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000651 * split the element name into prefix:localname , the string found
652 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000653 */
654 name = xmlSplitQName3(fullname, &len);
655 if (name == NULL) {
656 name = xmlDictLookup(ctxt->dict, fullname, -1);
657 prefix = NULL;
658 } else {
659 name = xmlDictLookup(ctxt->dict, name, -1);
660 prefix = xmlDictLookup(ctxt->dict, fullname, len);
661 }
662
663 /*
664 * make sure there is some storage
665 */
666 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
667 if (defaults == NULL) {
668 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000669 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000670 if (defaults == NULL)
671 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000672 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000676 xmlDefAttrsPtr temp;
677
678 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000679 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000680 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000681 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000682 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000683 defaults->maxAttrs *= 2;
684 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
685 }
686
687 /*
688 * plit the element name into prefix:localname , the string found
689 * are within the DTD and hen not associated to namespace names.
690 */
691 name = xmlSplitQName3(fullattr, &len);
692 if (name == NULL) {
693 name = xmlDictLookup(ctxt->dict, fullattr, -1);
694 prefix = NULL;
695 } else {
696 name = xmlDictLookup(ctxt->dict, name, -1);
697 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
698 }
699
700 defaults->values[4 * defaults->nbAttrs] = name;
701 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
702 /* intern the string and precompute the end */
703 len = xmlStrlen(value);
704 value = xmlDictLookup(ctxt->dict, value, len);
705 defaults->values[4 * defaults->nbAttrs + 2] = value;
706 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
707 defaults->nbAttrs++;
708
709 return;
710
711mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000712 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000713 return;
714}
715
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000716/**
717 * xmlAddSpecialAttr:
718 * @ctxt: an XML parser context
719 * @fullname: the element fullname
720 * @fullattr: the attribute fullname
721 * @type: the attribute type
722 *
723 * Register that this attribute is not CDATA
724 */
725static void
726xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
727 const xmlChar *fullname,
728 const xmlChar *fullattr,
729 int type)
730{
731 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000732 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000733 if (ctxt->attsSpecial == NULL)
734 goto mem_error;
735 }
736
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000737 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
738 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000739 return;
740
741mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000742 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000743 return;
744}
745
Daniel Veillard4432df22003-09-28 18:58:27 +0000746/**
747 * xmlCheckLanguageID:
748 * @lang: pointer to the string value
749 *
750 * Checks that the value conforms to the LanguageID production:
751 *
752 * NOTE: this is somewhat deprecated, those productions were removed from
753 * the XML Second edition.
754 *
755 * [33] LanguageID ::= Langcode ('-' Subcode)*
756 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
757 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
758 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
759 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
760 * [38] Subcode ::= ([a-z] | [A-Z])+
761 *
762 * Returns 1 if correct 0 otherwise
763 **/
764int
765xmlCheckLanguageID(const xmlChar * lang)
766{
767 const xmlChar *cur = lang;
768
769 if (cur == NULL)
770 return (0);
771 if (((cur[0] == 'i') && (cur[1] == '-')) ||
772 ((cur[0] == 'I') && (cur[1] == '-'))) {
773 /*
774 * IANA code
775 */
776 cur += 2;
777 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
778 ((cur[0] >= 'a') && (cur[0] <= 'z')))
779 cur++;
780 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
781 ((cur[0] == 'X') && (cur[1] == '-'))) {
782 /*
783 * User code
784 */
785 cur += 2;
786 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
787 ((cur[0] >= 'a') && (cur[0] <= 'z')))
788 cur++;
789 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
790 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
791 /*
792 * ISO639
793 */
794 cur++;
795 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
796 ((cur[0] >= 'a') && (cur[0] <= 'z')))
797 cur++;
798 else
799 return (0);
800 } else
801 return (0);
802 while (cur[0] != 0) { /* non input consuming */
803 if (cur[0] != '-')
804 return (0);
805 cur++;
806 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
807 ((cur[0] >= 'a') && (cur[0] <= 'z')))
808 cur++;
809 else
810 return (0);
811 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
812 ((cur[0] >= 'a') && (cur[0] <= 'z')))
813 cur++;
814 }
815 return (1);
816}
817
Owen Taylor3473f882001-02-23 17:55:21 +0000818/************************************************************************
819 * *
820 * Parser stacks related functions and macros *
821 * *
822 ************************************************************************/
823
824xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
825 const xmlChar ** str);
826
Daniel Veillard0fb18932003-09-07 09:14:37 +0000827#ifdef SAX2
828/**
829 * nsPush:
830 * @ctxt: an XML parser context
831 * @prefix: the namespace prefix or NULL
832 * @URL: the namespace name
833 *
834 * Pushes a new parser namespace on top of the ns stack
835 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000836 * Returns -1 in case of error, -2 if the namespace should be discarded
837 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000838 */
839static int
840nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
841{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000842 if (ctxt->options & XML_PARSE_NSCLEAN) {
843 int i;
844 for (i = 0;i < ctxt->nsNr;i += 2) {
845 if (ctxt->nsTab[i] == prefix) {
846 /* in scope */
847 if (ctxt->nsTab[i + 1] == URL)
848 return(-2);
849 /* out of scope keep it */
850 break;
851 }
852 }
853 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000854 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
855 ctxt->nsMax = 10;
856 ctxt->nsNr = 0;
857 ctxt->nsTab = (const xmlChar **)
858 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax = 0;
862 return (-1);
863 }
864 } else if (ctxt->nsNr >= ctxt->nsMax) {
865 ctxt->nsMax *= 2;
866 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000867 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
869 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000870 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 ctxt->nsMax /= 2;
872 return (-1);
873 }
874 }
875 ctxt->nsTab[ctxt->nsNr++] = prefix;
876 ctxt->nsTab[ctxt->nsNr++] = URL;
877 return (ctxt->nsNr);
878}
879/**
880 * nsPop:
881 * @ctxt: an XML parser context
882 * @nr: the number to pop
883 *
884 * Pops the top @nr parser prefix/namespace from the ns stack
885 *
886 * Returns the number of namespaces removed
887 */
888static int
889nsPop(xmlParserCtxtPtr ctxt, int nr)
890{
891 int i;
892
893 if (ctxt->nsTab == NULL) return(0);
894 if (ctxt->nsNr < nr) {
895 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
896 nr = ctxt->nsNr;
897 }
898 if (ctxt->nsNr <= 0)
899 return (0);
900
901 for (i = 0;i < nr;i++) {
902 ctxt->nsNr--;
903 ctxt->nsTab[ctxt->nsNr] = NULL;
904 }
905 return(nr);
906}
907#endif
908
909static int
910xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
911 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000912 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000913 int maxatts;
914
915 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **)
918 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
922 if (attallocs == NULL) goto mem_error;
923 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000925 } else if (nr + 5 > ctxt->maxatts) {
926 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
928 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000929 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000930 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000931 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
932 (maxatts / 5) * sizeof(int));
933 if (attallocs == NULL) goto mem_error;
934 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000935 ctxt->maxatts = maxatts;
936 }
937 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000938mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000939 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000940 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000941}
942
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000943/**
944 * inputPush:
945 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000947 *
948 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000949 *
950 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000951 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000952int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
954{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000955 if ((ctxt == NULL) || (value == NULL))
956 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000957 if (ctxt->inputNr >= ctxt->inputMax) {
958 ctxt->inputMax *= 2;
959 ctxt->inputTab =
960 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
961 ctxt->inputMax *
962 sizeof(ctxt->inputTab[0]));
963 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000964 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000965 return (0);
966 }
967 }
968 ctxt->inputTab[ctxt->inputNr] = value;
969 ctxt->input = value;
970 return (ctxt->inputNr++);
971}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000972/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 * @ctxt: an XML parser context
975 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000976 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000977 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000979 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000980xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000981inputPop(xmlParserCtxtPtr ctxt)
982{
983 xmlParserInputPtr ret;
984
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000985 if (ctxt == NULL)
986 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000987 if (ctxt->inputNr <= 0)
988 return (0);
989 ctxt->inputNr--;
990 if (ctxt->inputNr > 0)
991 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
992 else
993 ctxt->input = NULL;
994 ret = ctxt->inputTab[ctxt->inputNr];
995 ctxt->inputTab[ctxt->inputNr] = 0;
996 return (ret);
997}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000998/**
999 * nodePush:
1000 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001002 *
1003 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001004 *
1005 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001006 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001007int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001008nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1009{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001010 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001011 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001012 xmlNodePtr *tmp;
1013
1014 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1015 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001016 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001017 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001018 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001019 return (0);
1020 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001021 ctxt->nodeTab = tmp;
1022 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001023 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001024 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001025 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001026 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1027 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001029 return(0);
1030 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001031 ctxt->nodeTab[ctxt->nodeNr] = value;
1032 ctxt->node = value;
1033 return (ctxt->nodeNr++);
1034}
1035/**
1036 * nodePop:
1037 * @ctxt: an XML parser context
1038 *
1039 * Pops the top element node from the node stack
1040 *
1041 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001042 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001043xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001044nodePop(xmlParserCtxtPtr ctxt)
1045{
1046 xmlNodePtr ret;
1047
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001048 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001049 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001050 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001051 ctxt->nodeNr--;
1052 if (ctxt->nodeNr > 0)
1053 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1054 else
1055 ctxt->node = NULL;
1056 ret = ctxt->nodeTab[ctxt->nodeNr];
1057 ctxt->nodeTab[ctxt->nodeNr] = 0;
1058 return (ret);
1059}
Daniel Veillarda2351322004-06-27 12:08:10 +00001060
1061#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001062/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001063 * nameNsPush:
1064 * @ctxt: an XML parser context
1065 * @value: the element name
1066 * @prefix: the element prefix
1067 * @URI: the element namespace name
1068 *
1069 * Pushes a new element name/prefix/URL on top of the name stack
1070 *
1071 * Returns -1 in case of error, the index in the stack otherwise
1072 */
1073static int
1074nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1075 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1076{
1077 if (ctxt->nameNr >= ctxt->nameMax) {
1078 const xmlChar * *tmp;
1079 void **tmp2;
1080 ctxt->nameMax *= 2;
1081 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1082 ctxt->nameMax *
1083 sizeof(ctxt->nameTab[0]));
1084 if (tmp == NULL) {
1085 ctxt->nameMax /= 2;
1086 goto mem_error;
1087 }
1088 ctxt->nameTab = tmp;
1089 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1090 ctxt->nameMax * 3 *
1091 sizeof(ctxt->pushTab[0]));
1092 if (tmp2 == NULL) {
1093 ctxt->nameMax /= 2;
1094 goto mem_error;
1095 }
1096 ctxt->pushTab = tmp2;
1097 }
1098 ctxt->nameTab[ctxt->nameNr] = value;
1099 ctxt->name = value;
1100 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1101 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001102 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 return (ctxt->nameNr++);
1104mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001105 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001106 return (-1);
1107}
1108/**
1109 * nameNsPop:
1110 * @ctxt: an XML parser context
1111 *
1112 * Pops the top element/prefix/URI name from the name stack
1113 *
1114 * Returns the name just removed
1115 */
1116static const xmlChar *
1117nameNsPop(xmlParserCtxtPtr ctxt)
1118{
1119 const xmlChar *ret;
1120
1121 if (ctxt->nameNr <= 0)
1122 return (0);
1123 ctxt->nameNr--;
1124 if (ctxt->nameNr > 0)
1125 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1126 else
1127 ctxt->name = NULL;
1128 ret = ctxt->nameTab[ctxt->nameNr];
1129 ctxt->nameTab[ctxt->nameNr] = NULL;
1130 return (ret);
1131}
Daniel Veillarda2351322004-06-27 12:08:10 +00001132#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001133
1134/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001135 * namePush:
1136 * @ctxt: an XML parser context
1137 * @value: the element name
1138 *
1139 * Pushes a new element name on top of the name stack
1140 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001142 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001143int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001144namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001145{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001146 if (ctxt == NULL) return (-1);
1147
Daniel Veillard1c732d22002-11-30 11:22:59 +00001148 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001151 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001152 ctxt->nameMax *
1153 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (tmp == NULL) {
1155 ctxt->nameMax /= 2;
1156 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001157 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001159 }
1160 ctxt->nameTab[ctxt->nameNr] = value;
1161 ctxt->name = value;
1162 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001164 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001165 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001166}
1167/**
1168 * namePop:
1169 * @ctxt: an XML parser context
1170 *
1171 * Pops the top element name from the name stack
1172 *
1173 * Returns the name just removed
1174 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001175const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001176namePop(xmlParserCtxtPtr ctxt)
1177{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001178 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001179
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001180 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1181 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001182 ctxt->nameNr--;
1183 if (ctxt->nameNr > 0)
1184 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1185 else
1186 ctxt->name = NULL;
1187 ret = ctxt->nameTab[ctxt->nameNr];
1188 ctxt->nameTab[ctxt->nameNr] = 0;
1189 return (ret);
1190}
Owen Taylor3473f882001-02-23 17:55:21 +00001191
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001192static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001193 if (ctxt->spaceNr >= ctxt->spaceMax) {
1194 ctxt->spaceMax *= 2;
1195 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1196 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1197 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001198 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001199 return(0);
1200 }
1201 }
1202 ctxt->spaceTab[ctxt->spaceNr] = val;
1203 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1204 return(ctxt->spaceNr++);
1205}
1206
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001207static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001208 int ret;
1209 if (ctxt->spaceNr <= 0) return(0);
1210 ctxt->spaceNr--;
1211 if (ctxt->spaceNr > 0)
1212 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1213 else
1214 ctxt->space = NULL;
1215 ret = ctxt->spaceTab[ctxt->spaceNr];
1216 ctxt->spaceTab[ctxt->spaceNr] = -1;
1217 return(ret);
1218}
1219
1220/*
1221 * Macros for accessing the content. Those should be used only by the parser,
1222 * and not exported.
1223 *
1224 * Dirty macros, i.e. one often need to make assumption on the context to
1225 * use them
1226 *
1227 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1228 * To be used with extreme caution since operations consuming
1229 * characters may move the input buffer to a different location !
1230 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1231 * This should be used internally by the parser
1232 * only to compare to ASCII values otherwise it would break when
1233 * running with UTF-8 encoding.
1234 * RAW same as CUR but in the input buffer, bypass any token
1235 * extraction that may have been done
1236 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1237 * to compare on ASCII based substring.
1238 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001239 * strings without newlines within the parser.
1240 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1241 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001242 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1243 *
1244 * NEXT Skip to the next character, this does the proper decoding
1245 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001246 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001247 * CUR_CHAR(l) returns the current unicode character (int), set l
1248 * to the number of xmlChars used for the encoding [0-5].
1249 * CUR_SCHAR same but operate on a string instead of the context
1250 * COPY_BUF copy the current unicode char to the target buffer, increment
1251 * the index
1252 * GROW, SHRINK handling of input buffers
1253 */
1254
Daniel Veillardfdc91562002-07-01 21:52:03 +00001255#define RAW (*ctxt->input->cur)
1256#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001257#define NXT(val) ctxt->input->cur[(val)]
1258#define CUR_PTR ctxt->input->cur
1259
Daniel Veillarda07050d2003-10-19 14:46:32 +00001260#define CMP4( s, c1, c2, c3, c4 ) \
1261 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1262 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1263#define CMP5( s, c1, c2, c3, c4, c5 ) \
1264 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1265#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1266 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1267#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1268 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1269#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1270 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1271#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1272 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1273 ((unsigned char *) s)[ 8 ] == c9 )
1274#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1275 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1276 ((unsigned char *) s)[ 9 ] == c10 )
1277
Owen Taylor3473f882001-02-23 17:55:21 +00001278#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001279 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001280 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001281 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1283 xmlPopInput(ctxt); \
1284 } while (0)
1285
Daniel Veillard0b787f32004-03-26 17:29:53 +00001286#define SKIPL(val) do { \
1287 int skipl; \
1288 for(skipl=0; skipl<val; skipl++) { \
1289 if (*(ctxt->input->cur) == '\n') { \
1290 ctxt->input->line++; ctxt->input->col = 1; \
1291 } else ctxt->input->col++; \
1292 ctxt->nbChars++; \
1293 ctxt->input->cur++; \
1294 } \
1295 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1296 if ((*ctxt->input->cur == 0) && \
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1298 xmlPopInput(ctxt); \
1299 } while (0)
1300
Daniel Veillarda880b122003-04-21 21:36:41 +00001301#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001302 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1303 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001304 xmlSHRINK (ctxt);
1305
1306static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1307 xmlParserInputShrink(ctxt->input);
1308 if ((*ctxt->input->cur == 0) &&
1309 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1310 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001311 }
Owen Taylor3473f882001-02-23 17:55:21 +00001312
Daniel Veillarda880b122003-04-21 21:36:41 +00001313#define GROW if ((ctxt->progressive == 0) && \
1314 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001315 xmlGROW (ctxt);
1316
1317static void xmlGROW (xmlParserCtxtPtr ctxt) {
1318 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1319 if ((*ctxt->input->cur == 0) &&
1320 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1321 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001322}
Owen Taylor3473f882001-02-23 17:55:21 +00001323
1324#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1325
1326#define NEXT xmlNextChar(ctxt)
1327
Daniel Veillard21a0f912001-02-25 19:54:14 +00001328#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001329 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001330 ctxt->input->cur++; \
1331 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001332 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001333 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1334 }
1335
Owen Taylor3473f882001-02-23 17:55:21 +00001336#define NEXTL(l) do { \
1337 if (*(ctxt->input->cur) == '\n') { \
1338 ctxt->input->line++; ctxt->input->col = 1; \
1339 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001340 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001342 } while (0)
1343
1344#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1345#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1346
1347#define COPY_BUF(l,b,i,v) \
1348 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001349 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001350
1351/**
1352 * xmlSkipBlankChars:
1353 * @ctxt: the XML parser context
1354 *
1355 * skip all blanks character found at that point in the input streams.
1356 * It pops up finished entities in the process if allowable at that point.
1357 *
1358 * Returns the number of space chars skipped
1359 */
1360
1361int
1362xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001364
1365 /*
1366 * It's Okay to use CUR/NEXT here since all the blanks are on
1367 * the ASCII range.
1368 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001369 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1370 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001371 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001372 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001373 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001374 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001375 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001376 if (*cur == '\n') {
1377 ctxt->input->line++; ctxt->input->col = 1;
1378 }
1379 cur++;
1380 res++;
1381 if (*cur == 0) {
1382 ctxt->input->cur = cur;
1383 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1384 cur = ctxt->input->cur;
1385 }
1386 }
1387 ctxt->input->cur = cur;
1388 } else {
1389 int cur;
1390 do {
1391 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001392 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001393 NEXT;
1394 cur = CUR;
1395 res++;
1396 }
1397 while ((cur == 0) && (ctxt->inputNr > 1) &&
1398 (ctxt->instate != XML_PARSER_COMMENT)) {
1399 xmlPopInput(ctxt);
1400 cur = CUR;
1401 }
1402 /*
1403 * Need to handle support of entities branching here
1404 */
1405 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1406 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1407 }
Owen Taylor3473f882001-02-23 17:55:21 +00001408 return(res);
1409}
1410
1411/************************************************************************
1412 * *
1413 * Commodity functions to handle entities *
1414 * *
1415 ************************************************************************/
1416
1417/**
1418 * xmlPopInput:
1419 * @ctxt: an XML parser context
1420 *
1421 * xmlPopInput: the current input pointed by ctxt->input came to an end
1422 * pop it and return the next char.
1423 *
1424 * Returns the current xmlChar in the parser context
1425 */
1426xmlChar
1427xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001428 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001429 if (xmlParserDebugEntities)
1430 xmlGenericError(xmlGenericErrorContext,
1431 "Popping input %d\n", ctxt->inputNr);
1432 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001433 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001434 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1435 return(xmlPopInput(ctxt));
1436 return(CUR);
1437}
1438
1439/**
1440 * xmlPushInput:
1441 * @ctxt: an XML parser context
1442 * @input: an XML parser input fragment (entity, XML fragment ...).
1443 *
1444 * xmlPushInput: switch to a new input stream which is stacked on top
1445 * of the previous one(s).
1446 */
1447void
1448xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1449 if (input == NULL) return;
1450
1451 if (xmlParserDebugEntities) {
1452 if ((ctxt->input != NULL) && (ctxt->input->filename))
1453 xmlGenericError(xmlGenericErrorContext,
1454 "%s(%d): ", ctxt->input->filename,
1455 ctxt->input->line);
1456 xmlGenericError(xmlGenericErrorContext,
1457 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1458 }
1459 inputPush(ctxt, input);
1460 GROW;
1461}
1462
1463/**
1464 * xmlParseCharRef:
1465 * @ctxt: an XML parser context
1466 *
1467 * parse Reference declarations
1468 *
1469 * [66] CharRef ::= '&#' [0-9]+ ';' |
1470 * '&#x' [0-9a-fA-F]+ ';'
1471 *
1472 * [ WFC: Legal Character ]
1473 * Characters referred to using character references must match the
1474 * production for Char.
1475 *
1476 * Returns the value parsed (as an int), 0 in case of error
1477 */
1478int
1479xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001480 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001481 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001482 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001483
Owen Taylor3473f882001-02-23 17:55:21 +00001484 /*
1485 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1486 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001487 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001488 (NXT(2) == 'x')) {
1489 SKIP(3);
1490 GROW;
1491 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001492 if (count++ > 20) {
1493 count = 0;
1494 GROW;
1495 }
1496 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001497 val = val * 16 + (CUR - '0');
1498 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1499 val = val * 16 + (CUR - 'a') + 10;
1500 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1501 val = val * 16 + (CUR - 'A') + 10;
1502 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 val = 0;
1505 break;
1506 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001507 if (val > 0x10FFFF)
1508 outofrange = val;
1509
Owen Taylor3473f882001-02-23 17:55:21 +00001510 NEXT;
1511 count++;
1512 }
1513 if (RAW == ';') {
1514 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001515 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001516 ctxt->nbChars ++;
1517 ctxt->input->cur++;
1518 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001519 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 SKIP(2);
1521 GROW;
1522 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001523 if (count++ > 20) {
1524 count = 0;
1525 GROW;
1526 }
1527 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = val * 10 + (CUR - '0');
1529 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001530 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 val = 0;
1532 break;
1533 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001534 if (val > 0x10FFFF)
1535 outofrange = val;
1536
Owen Taylor3473f882001-02-23 17:55:21 +00001537 NEXT;
1538 count++;
1539 }
1540 if (RAW == ';') {
1541 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001542 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001543 ctxt->nbChars ++;
1544 ctxt->input->cur++;
1545 }
1546 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001548 }
1549
1550 /*
1551 * [ WFC: Legal Character ]
1552 * Characters referred to using character references must match the
1553 * production for Char.
1554 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001555 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001556 return(val);
1557 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001558 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1559 "xmlParseCharRef: invalid xmlChar value %d\n",
1560 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001561 }
1562 return(0);
1563}
1564
1565/**
1566 * xmlParseStringCharRef:
1567 * @ctxt: an XML parser context
1568 * @str: a pointer to an index in the string
1569 *
1570 * parse Reference declarations, variant parsing from a string rather
1571 * than an an input flow.
1572 *
1573 * [66] CharRef ::= '&#' [0-9]+ ';' |
1574 * '&#x' [0-9a-fA-F]+ ';'
1575 *
1576 * [ WFC: Legal Character ]
1577 * Characters referred to using character references must match the
1578 * production for Char.
1579 *
1580 * Returns the value parsed (as an int), 0 in case of error, str will be
1581 * updated to the current value of the index
1582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001583static int
Owen Taylor3473f882001-02-23 17:55:21 +00001584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1585 const xmlChar *ptr;
1586 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001587 unsigned int val = 0;
1588 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001589
1590 if ((str == NULL) || (*str == NULL)) return(0);
1591 ptr = *str;
1592 cur = *ptr;
1593 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1594 ptr += 3;
1595 cur = *ptr;
1596 while (cur != ';') { /* Non input consuming loop */
1597 if ((cur >= '0') && (cur <= '9'))
1598 val = val * 16 + (cur - '0');
1599 else if ((cur >= 'a') && (cur <= 'f'))
1600 val = val * 16 + (cur - 'a') + 10;
1601 else if ((cur >= 'A') && (cur <= 'F'))
1602 val = val * 16 + (cur - 'A') + 10;
1603 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001604 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001605 val = 0;
1606 break;
1607 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001608 if (val > 0x10FFFF)
1609 outofrange = val;
1610
Owen Taylor3473f882001-02-23 17:55:21 +00001611 ptr++;
1612 cur = *ptr;
1613 }
1614 if (cur == ';')
1615 ptr++;
1616 } else if ((cur == '&') && (ptr[1] == '#')){
1617 ptr += 2;
1618 cur = *ptr;
1619 while (cur != ';') { /* Non input consuming loops */
1620 if ((cur >= '0') && (cur <= '9'))
1621 val = val * 10 + (cur - '0');
1622 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001624 val = 0;
1625 break;
1626 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001627 if (val > 0x10FFFF)
1628 outofrange = val;
1629
Owen Taylor3473f882001-02-23 17:55:21 +00001630 ptr++;
1631 cur = *ptr;
1632 }
1633 if (cur == ';')
1634 ptr++;
1635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001636 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001637 return(0);
1638 }
1639 *str = ptr;
1640
1641 /*
1642 * [ WFC: Legal Character ]
1643 * Characters referred to using character references must match the
1644 * production for Char.
1645 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001646 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001647 return(val);
1648 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001649 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1650 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1651 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 }
1653 return(0);
1654}
1655
1656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001657 * xmlNewBlanksWrapperInputStream:
1658 * @ctxt: an XML parser context
1659 * @entity: an Entity pointer
1660 *
1661 * Create a new input stream for wrapping
1662 * blanks around a PEReference
1663 *
1664 * Returns the new input stream or NULL
1665 */
1666
1667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1668
Daniel Veillardf4862f02002-09-10 11:13:43 +00001669static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1671 xmlParserInputPtr input;
1672 xmlChar *buffer;
1673 size_t length;
1674 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001675 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1676 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001677 return(NULL);
1678 }
1679 if (xmlParserDebugEntities)
1680 xmlGenericError(xmlGenericErrorContext,
1681 "new blanks wrapper for entity: %s\n", entity->name);
1682 input = xmlNewInputStream(ctxt);
1683 if (input == NULL) {
1684 return(NULL);
1685 }
1686 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001687 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001688 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001689 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001690 return(NULL);
1691 }
1692 buffer [0] = ' ';
1693 buffer [1] = '%';
1694 buffer [length-3] = ';';
1695 buffer [length-2] = ' ';
1696 buffer [length-1] = 0;
1697 memcpy(buffer + 2, entity->name, length - 5);
1698 input->free = deallocblankswrapper;
1699 input->base = buffer;
1700 input->cur = buffer;
1701 input->length = length;
1702 input->end = &buffer[length];
1703 return(input);
1704}
1705
1706/**
Owen Taylor3473f882001-02-23 17:55:21 +00001707 * xmlParserHandlePEReference:
1708 * @ctxt: the parser context
1709 *
1710 * [69] PEReference ::= '%' Name ';'
1711 *
1712 * [ WFC: No Recursion ]
1713 * A parsed entity must not contain a recursive
1714 * reference to itself, either directly or indirectly.
1715 *
1716 * [ WFC: Entity Declared ]
1717 * In a document without any DTD, a document with only an internal DTD
1718 * subset which contains no parameter entity references, or a document
1719 * with "standalone='yes'", ... ... The declaration of a parameter
1720 * entity must precede any reference to it...
1721 *
1722 * [ VC: Entity Declared ]
1723 * In a document with an external subset or external parameter entities
1724 * with "standalone='no'", ... ... The declaration of a parameter entity
1725 * must precede any reference to it...
1726 *
1727 * [ WFC: In DTD ]
1728 * Parameter-entity references may only appear in the DTD.
1729 * NOTE: misleading but this is handled.
1730 *
1731 * A PEReference may have been detected in the current input stream
1732 * the handling is done accordingly to
1733 * http://www.w3.org/TR/REC-xml#entproc
1734 * i.e.
1735 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001736 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001737 */
1738void
1739xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001741 xmlEntityPtr entity = NULL;
1742 xmlParserInputPtr input;
1743
Owen Taylor3473f882001-02-23 17:55:21 +00001744 if (RAW != '%') return;
1745 switch(ctxt->instate) {
1746 case XML_PARSER_CDATA_SECTION:
1747 return;
1748 case XML_PARSER_COMMENT:
1749 return;
1750 case XML_PARSER_START_TAG:
1751 return;
1752 case XML_PARSER_END_TAG:
1753 return;
1754 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001755 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001756 return;
1757 case XML_PARSER_PROLOG:
1758 case XML_PARSER_START:
1759 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001760 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return;
1762 case XML_PARSER_ENTITY_DECL:
1763 case XML_PARSER_CONTENT:
1764 case XML_PARSER_ATTRIBUTE_VALUE:
1765 case XML_PARSER_PI:
1766 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001767 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001768 /* we just ignore it there */
1769 return;
1770 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001771 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001772 return;
1773 case XML_PARSER_ENTITY_VALUE:
1774 /*
1775 * NOTE: in the case of entity values, we don't do the
1776 * substitution here since we need the literal
1777 * entity value to be able to save the internal
1778 * subset of the document.
1779 * This will be handled by xmlStringDecodeEntities
1780 */
1781 return;
1782 case XML_PARSER_DTD:
1783 /*
1784 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1785 * In the internal DTD subset, parameter-entity references
1786 * can occur only where markup declarations can occur, not
1787 * within markup declarations.
1788 * In that case this is handled in xmlParseMarkupDecl
1789 */
1790 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1791 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001792 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001793 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001794 break;
1795 case XML_PARSER_IGNORE:
1796 return;
1797 }
1798
1799 NEXT;
1800 name = xmlParseName(ctxt);
1801 if (xmlParserDebugEntities)
1802 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001803 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001804 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001805 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001806 } else {
1807 if (RAW == ';') {
1808 NEXT;
1809 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1810 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1811 if (entity == NULL) {
1812
1813 /*
1814 * [ WFC: Entity Declared ]
1815 * In a document without any DTD, a document with only an
1816 * internal DTD subset which contains no parameter entity
1817 * references, or a document with "standalone='yes'", ...
1818 * ... The declaration of a parameter entity must precede
1819 * any reference to it...
1820 */
1821 if ((ctxt->standalone == 1) ||
1822 ((ctxt->hasExternalSubset == 0) &&
1823 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001825 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001826 } else {
1827 /*
1828 * [ VC: Entity Declared ]
1829 * In a document with an external subset or external
1830 * parameter entities with "standalone='no'", ...
1831 * ... The declaration of a parameter entity must precede
1832 * any reference to it...
1833 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001834 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1835 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1836 "PEReference: %%%s; not found\n",
1837 name);
1838 } else
1839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1840 "PEReference: %%%s; not found\n",
1841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 ctxt->valid = 0;
1843 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001844 } else if (ctxt->input->free != deallocblankswrapper) {
1845 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1846 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001847 } else {
1848 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1849 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001850 xmlChar start[4];
1851 xmlCharEncoding enc;
1852
Owen Taylor3473f882001-02-23 17:55:21 +00001853 /*
1854 * handle the extra spaces added before and after
1855 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001856 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001857 */
1858 input = xmlNewEntityInputStream(ctxt, entity);
1859 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860
1861 /*
1862 * Get the 4 first bytes and decode the charset
1863 * if enc != XML_CHAR_ENCODING_NONE
1864 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001865 * Note that, since we may have some non-UTF8
1866 * encoding (like UTF16, bug 135229), the 'length'
1867 * is not known, but we can calculate based upon
1868 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001869 */
1870 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001871 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001872 start[0] = RAW;
1873 start[1] = NXT(1);
1874 start[2] = NXT(2);
1875 start[3] = NXT(3);
1876 enc = xmlDetectCharEncoding(start, 4);
1877 if (enc != XML_CHAR_ENCODING_NONE) {
1878 xmlSwitchEncoding(ctxt, enc);
1879 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001880 }
1881
Owen Taylor3473f882001-02-23 17:55:21 +00001882 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001883 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1884 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001885 xmlParseTextDecl(ctxt);
1886 }
Owen Taylor3473f882001-02-23 17:55:21 +00001887 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001888 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1889 "PEReference: %s is not a parameter entity\n",
1890 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 }
1892 }
1893 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
Owen Taylor3473f882001-02-23 17:55:21 +00001896 }
1897}
1898
1899/*
1900 * Macro used to grow the current buffer.
1901 */
1902#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001903 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001904 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001905 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001906 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001907 if (tmp == NULL) goto mem_error; \
1908 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001909}
1910
1911/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001912 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001913 * @ctxt: the parser context
1914 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001916 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1917 * @end: an end marker xmlChar, 0 if none
1918 * @end2: an end marker xmlChar, 0 if none
1919 * @end3: an end marker xmlChar, 0 if none
1920 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001921 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001922 *
1923 * [67] Reference ::= EntityRef | CharRef
1924 *
1925 * [69] PEReference ::= '%' Name ';'
1926 *
1927 * Returns A newly allocated string with the substitution done. The caller
1928 * must deallocate it !
1929 */
1930xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1932 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001933 xmlChar *buffer = NULL;
1934 int buffer_size = 0;
1935
1936 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001937 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001938 xmlEntityPtr ent;
1939 int c,l;
1940 int nbchars = 0;
1941
Daniel Veillarda82b1822004-11-08 16:24:57 +00001942 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001944 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001945
1946 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001947 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 return(NULL);
1949 }
1950
1951 /*
1952 * allocate a translation buffer.
1953 */
1954 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001955 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001956 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001957
1958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001959 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001960 * we are operating on already parsed values.
1961 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001962 if (str < last)
1963 c = CUR_SCHAR(str, l);
1964 else
1965 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 while ((c != 0) && (c != end) && /* non input consuming loop */
1967 (c != end2) && (c != end3)) {
1968
1969 if (c == 0) break;
1970 if ((c == '&') && (str[1] == '#')) {
1971 int val = xmlParseStringCharRef(ctxt, &str);
1972 if (val != 0) {
1973 COPY_BUF(0,buffer,nbchars,val);
1974 }
1975 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1976 if (xmlParserDebugEntities)
1977 xmlGenericError(xmlGenericErrorContext,
1978 "String decoding Entity Reference: %.30s\n",
1979 str);
1980 ent = xmlParseStringEntityRef(ctxt, &str);
1981 if ((ent != NULL) &&
1982 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1983 if (ent->content != NULL) {
1984 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1985 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001986 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1987 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001988 }
1989 } else if ((ent != NULL) && (ent->content != NULL)) {
1990 xmlChar *rep;
1991
1992 ctxt->depth++;
1993 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1994 0, 0, 0);
1995 ctxt->depth--;
1996 if (rep != NULL) {
1997 current = rep;
1998 while (*current != 0) { /* non input consuming loop */
1999 buffer[nbchars++] = *current++;
2000 if (nbchars >
2001 buffer_size - XML_PARSER_BUFFER_SIZE) {
2002 growBuffer(buffer);
2003 }
2004 }
2005 xmlFree(rep);
2006 }
2007 } else if (ent != NULL) {
2008 int i = xmlStrlen(ent->name);
2009 const xmlChar *cur = ent->name;
2010
2011 buffer[nbchars++] = '&';
2012 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2013 growBuffer(buffer);
2014 }
2015 for (;i > 0;i--)
2016 buffer[nbchars++] = *cur++;
2017 buffer[nbchars++] = ';';
2018 }
2019 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2020 if (xmlParserDebugEntities)
2021 xmlGenericError(xmlGenericErrorContext,
2022 "String decoding PE Reference: %.30s\n", str);
2023 ent = xmlParseStringPEReference(ctxt, &str);
2024 if (ent != NULL) {
2025 xmlChar *rep;
2026
2027 ctxt->depth++;
2028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2029 0, 0, 0);
2030 ctxt->depth--;
2031 if (rep != NULL) {
2032 current = rep;
2033 while (*current != 0) { /* non input consuming loop */
2034 buffer[nbchars++] = *current++;
2035 if (nbchars >
2036 buffer_size - XML_PARSER_BUFFER_SIZE) {
2037 growBuffer(buffer);
2038 }
2039 }
2040 xmlFree(rep);
2041 }
2042 }
2043 } else {
2044 COPY_BUF(l,buffer,nbchars,c);
2045 str += l;
2046 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2047 growBuffer(buffer);
2048 }
2049 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002050 if (str < last)
2051 c = CUR_SCHAR(str, l);
2052 else
2053 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002054 }
2055 buffer[nbchars++] = 0;
2056 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002057
2058mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002059 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002060 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002061}
2062
Daniel Veillarde57ec792003-09-10 10:50:59 +00002063/**
2064 * xmlStringDecodeEntities:
2065 * @ctxt: the parser context
2066 * @str: the input string
2067 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2068 * @end: an end marker xmlChar, 0 if none
2069 * @end2: an end marker xmlChar, 0 if none
2070 * @end3: an end marker xmlChar, 0 if none
2071 *
2072 * Takes a entity string content and process to do the adequate substitutions.
2073 *
2074 * [67] Reference ::= EntityRef | CharRef
2075 *
2076 * [69] PEReference ::= '%' Name ';'
2077 *
2078 * Returns A newly allocated string with the substitution done. The caller
2079 * must deallocate it !
2080 */
2081xmlChar *
2082xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2083 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002084 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002085 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2086 end, end2, end3));
2087}
Owen Taylor3473f882001-02-23 17:55:21 +00002088
2089/************************************************************************
2090 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002091 * Commodity functions, cleanup needed ? *
2092 * *
2093 ************************************************************************/
2094
2095/**
2096 * areBlanks:
2097 * @ctxt: an XML parser context
2098 * @str: a xmlChar *
2099 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002100 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002101 *
2102 * Is this a sequence of blank chars that one can ignore ?
2103 *
2104 * Returns 1 if ignorable 0 otherwise.
2105 */
2106
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002107static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2108 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002109 int i, ret;
2110 xmlNodePtr lastChild;
2111
Daniel Veillard05c13a22001-09-09 08:38:09 +00002112 /*
2113 * Don't spend time trying to differentiate them, the same callback is
2114 * used !
2115 */
2116 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002117 return(0);
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 /*
2120 * Check for xml:space value.
2121 */
2122 if (*(ctxt->space) == 1)
2123 return(0);
2124
2125 /*
2126 * Check that the string is made of blanks
2127 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002128 if (blank_chars == 0) {
2129 for (i = 0;i < len;i++)
2130 if (!(IS_BLANK_CH(str[i]))) return(0);
2131 }
Owen Taylor3473f882001-02-23 17:55:21 +00002132
2133 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002134 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002135 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002136 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if (ctxt->myDoc != NULL) {
2138 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2139 if (ret == 0) return(1);
2140 if (ret == 1) return(0);
2141 }
2142
2143 /*
2144 * Otherwise, heuristic :-\
2145 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002146 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002147 if ((ctxt->node->children == NULL) &&
2148 (RAW == '<') && (NXT(1) == '/')) return(0);
2149
2150 lastChild = xmlGetLastChild(ctxt->node);
2151 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002152 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2153 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002154 } else if (xmlNodeIsText(lastChild))
2155 return(0);
2156 else if ((ctxt->node->children != NULL) &&
2157 (xmlNodeIsText(ctxt->node->children)))
2158 return(0);
2159 return(1);
2160}
2161
Owen Taylor3473f882001-02-23 17:55:21 +00002162/************************************************************************
2163 * *
2164 * Extra stuff for namespace support *
2165 * Relates to http://www.w3.org/TR/WD-xml-names *
2166 * *
2167 ************************************************************************/
2168
2169/**
2170 * xmlSplitQName:
2171 * @ctxt: an XML parser context
2172 * @name: an XML parser context
2173 * @prefix: a xmlChar **
2174 *
2175 * parse an UTF8 encoded XML qualified name string
2176 *
2177 * [NS 5] QName ::= (Prefix ':')? LocalPart
2178 *
2179 * [NS 6] Prefix ::= NCName
2180 *
2181 * [NS 7] LocalPart ::= NCName
2182 *
2183 * Returns the local part, and prefix is updated
2184 * to get the Prefix if any.
2185 */
2186
2187xmlChar *
2188xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2189 xmlChar buf[XML_MAX_NAMELEN + 5];
2190 xmlChar *buffer = NULL;
2191 int len = 0;
2192 int max = XML_MAX_NAMELEN;
2193 xmlChar *ret = NULL;
2194 const xmlChar *cur = name;
2195 int c;
2196
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002197 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002198 *prefix = NULL;
2199
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002200 if (cur == NULL) return(NULL);
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202#ifndef XML_XML_NAMESPACE
2203 /* xml: prefix is not really a namespace */
2204 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2205 (cur[2] == 'l') && (cur[3] == ':'))
2206 return(xmlStrdup(name));
2207#endif
2208
Daniel Veillard597bc482003-07-24 16:08:28 +00002209 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002210 if (cur[0] == ':')
2211 return(xmlStrdup(name));
2212
2213 c = *cur++;
2214 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2215 buf[len++] = c;
2216 c = *cur++;
2217 }
2218 if (len >= max) {
2219 /*
2220 * Okay someone managed to make a huge name, so he's ready to pay
2221 * for the processing speed.
2222 */
2223 max = len * 2;
2224
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002225 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002226 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002227 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(NULL);
2229 }
2230 memcpy(buffer, buf, len);
2231 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2232 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002233 xmlChar *tmp;
2234
Owen Taylor3473f882001-02-23 17:55:21 +00002235 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002236 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002237 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002238 if (tmp == NULL) {
2239 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002243 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002244 }
2245 buffer[len++] = c;
2246 c = *cur++;
2247 }
2248 buffer[len] = 0;
2249 }
2250
Daniel Veillard597bc482003-07-24 16:08:28 +00002251 /* nasty but well=formed
2252 if ((c == ':') && (*cur == 0)) {
2253 return(xmlStrdup(name));
2254 } */
2255
Owen Taylor3473f882001-02-23 17:55:21 +00002256 if (buffer == NULL)
2257 ret = xmlStrndup(buf, len);
2258 else {
2259 ret = buffer;
2260 buffer = NULL;
2261 max = XML_MAX_NAMELEN;
2262 }
2263
2264
2265 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002266 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002268 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002269 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 len = 0;
2272
Daniel Veillardbb284f42002-10-16 18:02:47 +00002273 /*
2274 * Check that the first character is proper to start
2275 * a new name
2276 */
2277 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2278 ((c >= 0x41) && (c <= 0x5A)) ||
2279 (c == '_') || (c == ':'))) {
2280 int l;
2281 int first = CUR_SCHAR(cur, l);
2282
2283 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002284 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002285 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002286 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002287 }
2288 }
2289 cur++;
2290
Owen Taylor3473f882001-02-23 17:55:21 +00002291 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2292 buf[len++] = c;
2293 c = *cur++;
2294 }
2295 if (len >= max) {
2296 /*
2297 * Okay someone managed to make a huge name, so he's ready to pay
2298 * for the processing speed.
2299 */
2300 max = len * 2;
2301
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002302 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002304 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002305 return(NULL);
2306 }
2307 memcpy(buffer, buf, len);
2308 while (c != 0) { /* tested bigname2.xml */
2309 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002310 xmlChar *tmp;
2311
Owen Taylor3473f882001-02-23 17:55:21 +00002312 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002313 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002314 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002315 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002316 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002317 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002318 return(NULL);
2319 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002320 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002321 }
2322 buffer[len++] = c;
2323 c = *cur++;
2324 }
2325 buffer[len] = 0;
2326 }
2327
2328 if (buffer == NULL)
2329 ret = xmlStrndup(buf, len);
2330 else {
2331 ret = buffer;
2332 }
2333 }
2334
2335 return(ret);
2336}
2337
2338/************************************************************************
2339 * *
2340 * The parser itself *
2341 * Relates to http://www.w3.org/TR/REC-xml *
2342 * *
2343 ************************************************************************/
2344
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002345static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002346static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002347 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002348
Owen Taylor3473f882001-02-23 17:55:21 +00002349/**
2350 * xmlParseName:
2351 * @ctxt: an XML parser context
2352 *
2353 * parse an XML name.
2354 *
2355 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2356 * CombiningChar | Extender
2357 *
2358 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2359 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002360 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002361 *
2362 * Returns the Name parsed or NULL
2363 */
2364
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002366xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002367 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002368 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 int count = 0;
2370
2371 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002372
2373 /*
2374 * Accelerator for simple ASCII names
2375 */
2376 in = ctxt->input->cur;
2377 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2378 ((*in >= 0x41) && (*in <= 0x5A)) ||
2379 (*in == '_') || (*in == ':')) {
2380 in++;
2381 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2382 ((*in >= 0x41) && (*in <= 0x5A)) ||
2383 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002384 (*in == '_') || (*in == '-') ||
2385 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002386 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002387 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002388 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002390 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002391 ctxt->nbChars += count;
2392 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002393 if (ret == NULL)
2394 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002395 return(ret);
2396 }
2397 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002398 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002399}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002400
Daniel Veillard46de64e2002-05-29 08:21:33 +00002401/**
2402 * xmlParseNameAndCompare:
2403 * @ctxt: an XML parser context
2404 *
2405 * parse an XML name and compares for match
2406 * (specialized for endtag parsing)
2407 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2409 * and the name for mismatch
2410 */
2411
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002412static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002413xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002414 register const xmlChar *cmp = other;
2415 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002416 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002417
2418 GROW;
2419
2420 in = ctxt->input->cur;
2421 while (*in != 0 && *in == *cmp) {
2422 ++in;
2423 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002424 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002425 }
William M. Brack76e95df2003-10-18 16:20:14 +00002426 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002427 /* success */
2428 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002429 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002430 }
2431 /* failure (or end of input buffer), check with full function */
2432 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002433 /* strings coming from the dictionnary direct compare possible */
2434 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002435 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002436 }
2437 return ret;
2438}
2439
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002440static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002442 int len = 0, l;
2443 int c;
2444 int count = 0;
2445
2446 /*
2447 * Handler for more complex cases
2448 */
2449 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002450 c = CUR_CHAR(l);
2451 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2452 (!IS_LETTER(c) && (c != '_') &&
2453 (c != ':'))) {
2454 return(NULL);
2455 }
2456
2457 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002458 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002459 (c == '.') || (c == '-') ||
2460 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002461 (IS_COMBINING(c)) ||
2462 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002463 if (count++ > 100) {
2464 count = 0;
2465 GROW;
2466 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002467 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002468 NEXTL(l);
2469 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002471 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002472}
2473
2474/**
2475 * xmlParseStringName:
2476 * @ctxt: an XML parser context
2477 * @str: a pointer to the string pointer (IN/OUT)
2478 *
2479 * parse an XML name.
2480 *
2481 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2482 * CombiningChar | Extender
2483 *
2484 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2485 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002486 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002487 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002488 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002489 * is updated to the current location in the string.
2490 */
2491
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002492static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002493xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2494 xmlChar buf[XML_MAX_NAMELEN + 5];
2495 const xmlChar *cur = *str;
2496 int len = 0, l;
2497 int c;
2498
2499 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002500 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002501 (c != ':')) {
2502 return(NULL);
2503 }
2504
William M. Brack871611b2003-10-18 04:53:14 +00002505 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002506 (c == '.') || (c == '-') ||
2507 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002508 (IS_COMBINING(c)) ||
2509 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002510 COPY_BUF(l,buf,len,c);
2511 cur += l;
2512 c = CUR_SCHAR(cur, l);
2513 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2514 /*
2515 * Okay someone managed to make a huge name, so he's ready to pay
2516 * for the processing speed.
2517 */
2518 xmlChar *buffer;
2519 int max = len * 2;
2520
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002521 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002522 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002523 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002524 return(NULL);
2525 }
2526 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002527 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002528 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002529 (c == '.') || (c == '-') ||
2530 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002531 (IS_COMBINING(c)) ||
2532 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002533 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002534 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002536 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002537 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002538 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002539 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002540 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002541 return(NULL);
2542 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002543 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002544 }
2545 COPY_BUF(l,buffer,len,c);
2546 cur += l;
2547 c = CUR_SCHAR(cur, l);
2548 }
2549 buffer[len] = 0;
2550 *str = cur;
2551 return(buffer);
2552 }
2553 }
2554 *str = cur;
2555 return(xmlStrndup(buf, len));
2556}
2557
2558/**
2559 * xmlParseNmtoken:
2560 * @ctxt: an XML parser context
2561 *
2562 * parse an XML Nmtoken.
2563 *
2564 * [7] Nmtoken ::= (NameChar)+
2565 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002566 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002567 *
2568 * Returns the Nmtoken parsed or NULL
2569 */
2570
2571xmlChar *
2572xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2573 xmlChar buf[XML_MAX_NAMELEN + 5];
2574 int len = 0, l;
2575 int c;
2576 int count = 0;
2577
2578 GROW;
2579 c = CUR_CHAR(l);
2580
William M. Brack871611b2003-10-18 04:53:14 +00002581 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002582 (c == '.') || (c == '-') ||
2583 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002584 (IS_COMBINING(c)) ||
2585 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002586 if (count++ > 100) {
2587 count = 0;
2588 GROW;
2589 }
2590 COPY_BUF(l,buf,len,c);
2591 NEXTL(l);
2592 c = CUR_CHAR(l);
2593 if (len >= XML_MAX_NAMELEN) {
2594 /*
2595 * Okay someone managed to make a huge token, so he's ready to pay
2596 * for the processing speed.
2597 */
2598 xmlChar *buffer;
2599 int max = len * 2;
2600
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002601 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002602 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002603 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002604 return(NULL);
2605 }
2606 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002607 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002608 (c == '.') || (c == '-') ||
2609 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002610 (IS_COMBINING(c)) ||
2611 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002612 if (count++ > 100) {
2613 count = 0;
2614 GROW;
2615 }
2616 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002617 xmlChar *tmp;
2618
Owen Taylor3473f882001-02-23 17:55:21 +00002619 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002620 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002621 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002622 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002624 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002625 return(NULL);
2626 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002627 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 }
2629 COPY_BUF(l,buffer,len,c);
2630 NEXTL(l);
2631 c = CUR_CHAR(l);
2632 }
2633 buffer[len] = 0;
2634 return(buffer);
2635 }
2636 }
2637 if (len == 0)
2638 return(NULL);
2639 return(xmlStrndup(buf, len));
2640}
2641
2642/**
2643 * xmlParseEntityValue:
2644 * @ctxt: an XML parser context
2645 * @orig: if non-NULL store a copy of the original entity value
2646 *
2647 * parse a value for ENTITY declarations
2648 *
2649 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2650 * "'" ([^%&'] | PEReference | Reference)* "'"
2651 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002652 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002653 */
2654
2655xmlChar *
2656xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2657 xmlChar *buf = NULL;
2658 int len = 0;
2659 int size = XML_PARSER_BUFFER_SIZE;
2660 int c, l;
2661 xmlChar stop;
2662 xmlChar *ret = NULL;
2663 const xmlChar *cur = NULL;
2664 xmlParserInputPtr input;
2665
2666 if (RAW == '"') stop = '"';
2667 else if (RAW == '\'') stop = '\'';
2668 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002669 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002670 return(NULL);
2671 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002672 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002675 return(NULL);
2676 }
2677
2678 /*
2679 * The content of the entity definition is copied in a buffer.
2680 */
2681
2682 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2683 input = ctxt->input;
2684 GROW;
2685 NEXT;
2686 c = CUR_CHAR(l);
2687 /*
2688 * NOTE: 4.4.5 Included in Literal
2689 * When a parameter entity reference appears in a literal entity
2690 * value, ... a single or double quote character in the replacement
2691 * text is always treated as a normal data character and will not
2692 * terminate the literal.
2693 * In practice it means we stop the loop only when back at parsing
2694 * the initial entity and the quote is found
2695 */
William M. Brack871611b2003-10-18 04:53:14 +00002696 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002697 (ctxt->input != input))) {
2698 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002699 xmlChar *tmp;
2700
Owen Taylor3473f882001-02-23 17:55:21 +00002701 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002702 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2703 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002704 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002705 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 return(NULL);
2707 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002708 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002709 }
2710 COPY_BUF(l,buf,len,c);
2711 NEXTL(l);
2712 /*
2713 * Pop-up of finished entities.
2714 */
2715 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2716 xmlPopInput(ctxt);
2717
2718 GROW;
2719 c = CUR_CHAR(l);
2720 if (c == 0) {
2721 GROW;
2722 c = CUR_CHAR(l);
2723 }
2724 }
2725 buf[len] = 0;
2726
2727 /*
2728 * Raise problem w.r.t. '&' and '%' being used in non-entities
2729 * reference constructs. Note Charref will be handled in
2730 * xmlStringDecodeEntities()
2731 */
2732 cur = buf;
2733 while (*cur != 0) { /* non input consuming */
2734 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2735 xmlChar *name;
2736 xmlChar tmp = *cur;
2737
2738 cur++;
2739 name = xmlParseStringName(ctxt, &cur);
2740 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002741 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002742 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002743 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002744 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002745 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2746 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002747 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002748 }
2749 if (name != NULL)
2750 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002751 if (*cur == 0)
2752 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 cur++;
2755 }
2756
2757 /*
2758 * Then PEReference entities are substituted.
2759 */
2760 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002761 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002762 xmlFree(buf);
2763 } else {
2764 NEXT;
2765 /*
2766 * NOTE: 4.4.7 Bypassed
2767 * When a general entity reference appears in the EntityValue in
2768 * an entity declaration, it is bypassed and left as is.
2769 * so XML_SUBSTITUTE_REF is not set here.
2770 */
2771 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2772 0, 0, 0);
2773 if (orig != NULL)
2774 *orig = buf;
2775 else
2776 xmlFree(buf);
2777 }
2778
2779 return(ret);
2780}
2781
2782/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002783 * xmlParseAttValueComplex:
2784 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002785 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002786 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002787 *
2788 * parse a value for an attribute, this is the fallback function
2789 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002790 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002791 *
2792 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2793 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002794static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002795xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002796 xmlChar limit = 0;
2797 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002798 int len = 0;
2799 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002800 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002801 xmlChar *current = NULL;
2802 xmlEntityPtr ent;
2803
Owen Taylor3473f882001-02-23 17:55:21 +00002804 if (NXT(0) == '"') {
2805 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2806 limit = '"';
2807 NEXT;
2808 } else if (NXT(0) == '\'') {
2809 limit = '\'';
2810 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2811 NEXT;
2812 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002813 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002814 return(NULL);
2815 }
2816
2817 /*
2818 * allocate a translation buffer.
2819 */
2820 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002821 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002822 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002823
2824 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002825 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002826 */
2827 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002828 while ((NXT(0) != limit) && /* checked */
2829 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002830 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002831 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002832 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002833 if (NXT(1) == '#') {
2834 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002835
Owen Taylor3473f882001-02-23 17:55:21 +00002836 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002837 if (ctxt->replaceEntities) {
2838 if (len > buf_size - 10) {
2839 growBuffer(buf);
2840 }
2841 buf[len++] = '&';
2842 } else {
2843 /*
2844 * The reparsing will be done in xmlStringGetNodeList()
2845 * called by the attribute() function in SAX.c
2846 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002847 if (len > buf_size - 10) {
2848 growBuffer(buf);
2849 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002850 buf[len++] = '&';
2851 buf[len++] = '#';
2852 buf[len++] = '3';
2853 buf[len++] = '8';
2854 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002855 }
2856 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002857 if (len > buf_size - 10) {
2858 growBuffer(buf);
2859 }
Owen Taylor3473f882001-02-23 17:55:21 +00002860 len += xmlCopyChar(0, &buf[len], val);
2861 }
2862 } else {
2863 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002864 if ((ent != NULL) &&
2865 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2866 if (len > buf_size - 10) {
2867 growBuffer(buf);
2868 }
2869 if ((ctxt->replaceEntities == 0) &&
2870 (ent->content[0] == '&')) {
2871 buf[len++] = '&';
2872 buf[len++] = '#';
2873 buf[len++] = '3';
2874 buf[len++] = '8';
2875 buf[len++] = ';';
2876 } else {
2877 buf[len++] = ent->content[0];
2878 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002879 } else if ((ent != NULL) &&
2880 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 xmlChar *rep;
2882
2883 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2884 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002885 XML_SUBSTITUTE_REF,
2886 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002887 if (rep != NULL) {
2888 current = rep;
2889 while (*current != 0) { /* non input consuming */
2890 buf[len++] = *current++;
2891 if (len > buf_size - 10) {
2892 growBuffer(buf);
2893 }
2894 }
2895 xmlFree(rep);
2896 }
2897 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002898 if (len > buf_size - 10) {
2899 growBuffer(buf);
2900 }
Owen Taylor3473f882001-02-23 17:55:21 +00002901 if (ent->content != NULL)
2902 buf[len++] = ent->content[0];
2903 }
2904 } else if (ent != NULL) {
2905 int i = xmlStrlen(ent->name);
2906 const xmlChar *cur = ent->name;
2907
2908 /*
2909 * This may look absurd but is needed to detect
2910 * entities problems
2911 */
2912 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2913 (ent->content != NULL)) {
2914 xmlChar *rep;
2915 rep = xmlStringDecodeEntities(ctxt, ent->content,
2916 XML_SUBSTITUTE_REF, 0, 0, 0);
2917 if (rep != NULL)
2918 xmlFree(rep);
2919 }
2920
2921 /*
2922 * Just output the reference
2923 */
2924 buf[len++] = '&';
2925 if (len > buf_size - i - 10) {
2926 growBuffer(buf);
2927 }
2928 for (;i > 0;i--)
2929 buf[len++] = *cur++;
2930 buf[len++] = ';';
2931 }
2932 }
2933 } else {
2934 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002935 if ((len != 0) || (!normalize)) {
2936 if ((!normalize) || (!in_space)) {
2937 COPY_BUF(l,buf,len,0x20);
2938 if (len > buf_size - 10) {
2939 growBuffer(buf);
2940 }
2941 }
2942 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002943 }
2944 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002945 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002946 COPY_BUF(l,buf,len,c);
2947 if (len > buf_size - 10) {
2948 growBuffer(buf);
2949 }
2950 }
2951 NEXTL(l);
2952 }
2953 GROW;
2954 c = CUR_CHAR(l);
2955 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002956 if ((in_space) && (normalize)) {
2957 while (buf[len - 1] == 0x20) len--;
2958 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002959 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002960 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002961 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002962 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002963 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2964 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002965 } else
2966 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002967 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002968 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002969
2970mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002971 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002972 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002973}
2974
2975/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002976 * xmlParseAttValue:
2977 * @ctxt: an XML parser context
2978 *
2979 * parse a value for an attribute
2980 * Note: the parser won't do substitution of entities here, this
2981 * will be handled later in xmlStringGetNodeList
2982 *
2983 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2984 * "'" ([^<&'] | Reference)* "'"
2985 *
2986 * 3.3.3 Attribute-Value Normalization:
2987 * Before the value of an attribute is passed to the application or
2988 * checked for validity, the XML processor must normalize it as follows:
2989 * - a character reference is processed by appending the referenced
2990 * character to the attribute value
2991 * - an entity reference is processed by recursively processing the
2992 * replacement text of the entity
2993 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2994 * appending #x20 to the normalized value, except that only a single
2995 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2996 * parsed entity or the literal entity value of an internal parsed entity
2997 * - other characters are processed by appending them to the normalized value
2998 * If the declared value is not CDATA, then the XML processor must further
2999 * process the normalized attribute value by discarding any leading and
3000 * trailing space (#x20) characters, and by replacing sequences of space
3001 * (#x20) characters by a single space (#x20) character.
3002 * All attributes for which no declaration has been read should be treated
3003 * by a non-validating parser as if declared CDATA.
3004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
3007
3008
3009xmlChar *
3010xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003011 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013}
3014
3015/**
Owen Taylor3473f882001-02-23 17:55:21 +00003016 * xmlParseSystemLiteral:
3017 * @ctxt: an XML parser context
3018 *
3019 * parse an XML Literal
3020 *
3021 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3022 *
3023 * Returns the SystemLiteral parsed or NULL
3024 */
3025
3026xmlChar *
3027xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3028 xmlChar *buf = NULL;
3029 int len = 0;
3030 int size = XML_PARSER_BUFFER_SIZE;
3031 int cur, l;
3032 xmlChar stop;
3033 int state = ctxt->instate;
3034 int count = 0;
3035
3036 SHRINK;
3037 if (RAW == '"') {
3038 NEXT;
3039 stop = '"';
3040 } else if (RAW == '\'') {
3041 NEXT;
3042 stop = '\'';
3043 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003045 return(NULL);
3046 }
3047
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003048 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003050 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003051 return(NULL);
3052 }
3053 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3054 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003055 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003056 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003057 xmlChar *tmp;
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003060 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3061 if (tmp == NULL) {
3062 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003063 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 ctxt->instate = (xmlParserInputState) state;
3065 return(NULL);
3066 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003067 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 count++;
3070 if (count > 50) {
3071 GROW;
3072 count = 0;
3073 }
3074 COPY_BUF(l,buf,len,cur);
3075 NEXTL(l);
3076 cur = CUR_CHAR(l);
3077 if (cur == 0) {
3078 GROW;
3079 SHRINK;
3080 cur = CUR_CHAR(l);
3081 }
3082 }
3083 buf[len] = 0;
3084 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003085 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003086 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003087 } else {
3088 NEXT;
3089 }
3090 return(buf);
3091}
3092
3093/**
3094 * xmlParsePubidLiteral:
3095 * @ctxt: an XML parser context
3096 *
3097 * parse an XML public literal
3098 *
3099 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3100 *
3101 * Returns the PubidLiteral parsed or NULL.
3102 */
3103
3104xmlChar *
3105xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3106 xmlChar *buf = NULL;
3107 int len = 0;
3108 int size = XML_PARSER_BUFFER_SIZE;
3109 xmlChar cur;
3110 xmlChar stop;
3111 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003112 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003113
3114 SHRINK;
3115 if (RAW == '"') {
3116 NEXT;
3117 stop = '"';
3118 } else if (RAW == '\'') {
3119 NEXT;
3120 stop = '\'';
3121 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003122 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003123 return(NULL);
3124 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003125 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003126 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003127 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003128 return(NULL);
3129 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003130 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003131 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003132 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003134 xmlChar *tmp;
3135
Owen Taylor3473f882001-02-23 17:55:21 +00003136 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003137 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003139 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003140 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003141 return(NULL);
3142 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003143 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003144 }
3145 buf[len++] = cur;
3146 count++;
3147 if (count > 50) {
3148 GROW;
3149 count = 0;
3150 }
3151 NEXT;
3152 cur = CUR;
3153 if (cur == 0) {
3154 GROW;
3155 SHRINK;
3156 cur = CUR;
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003161 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 } else {
3163 NEXT;
3164 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003165 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003166 return(buf);
3167}
3168
Daniel Veillard48b2f892001-02-25 16:11:03 +00003169void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003170/**
3171 * xmlParseCharData:
3172 * @ctxt: an XML parser context
3173 * @cdata: int indicating whether we are within a CDATA section
3174 *
3175 * parse a CharData section.
3176 * if we are within a CDATA section ']]>' marks an end of section.
3177 *
3178 * The right angle bracket (>) may be represented using the string "&gt;",
3179 * and must, for compatibility, be escaped using "&gt;" or a character
3180 * reference when it appears in the string "]]>" in content, when that
3181 * string is not marking the end of a CDATA section.
3182 *
3183 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3184 */
3185
3186void
3187xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003188 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003189 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003190 int line = ctxt->input->line;
3191 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003192 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003193
3194 SHRINK;
3195 GROW;
3196 /*
3197 * Accelerated common case where input don't need to be
3198 * modified before passing it to the handler.
3199 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003200 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003201 in = ctxt->input->cur;
3202 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003203get_more_space:
3204 while (*in == 0x20) in++;
3205 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003206 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003207 in++;
3208 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003209 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003210 in++;
3211 }
3212 goto get_more_space;
3213 }
3214 if (*in == '<') {
3215 nbchar = in - ctxt->input->cur;
3216 if (nbchar > 0) {
3217 const xmlChar *tmp = ctxt->input->cur;
3218 ctxt->input->cur = in;
3219
Daniel Veillard34099b42004-11-04 17:34:35 +00003220 if ((ctxt->sax != NULL) &&
3221 (ctxt->sax->ignorableWhitespace !=
3222 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003223 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3224 ctxt->sax->ignorableWhitespace(ctxt->userData,
3225 tmp, nbchar);
3226 } else if (ctxt->sax->characters != NULL)
3227 ctxt->sax->characters(ctxt->userData,
3228 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003229 } else if ((ctxt->sax != NULL) &&
3230 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003231 ctxt->sax->characters(ctxt->userData,
3232 tmp, nbchar);
3233 }
3234 }
3235 return;
3236 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003237
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003238get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003239 ccol = ctxt->input->col;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003240 while (((*in > ']') && (*in <= 0x7F)) ||
3241 ((*in > '&') && (*in < '<')) ||
3242 ((*in > '<') && (*in < ']')) ||
3243 ((*in >= 0x20) && (*in < '&')) ||
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003244 (*in == 0x09)) {
3245 in++;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003246 ccol++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003247 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003248 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003249 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003250 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003251 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003252 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003253 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003254 in++;
3255 }
3256 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003257 }
3258 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003259 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003260 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003261 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003262 return;
3263 }
3264 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003265 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003266 goto get_more;
3267 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003268 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003269 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003270 if ((ctxt->sax != NULL) &&
3271 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003272 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003273 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003274 const xmlChar *tmp = ctxt->input->cur;
3275 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003276
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003277 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003278 ctxt->sax->ignorableWhitespace(ctxt->userData,
3279 tmp, nbchar);
3280 } else if (ctxt->sax->characters != NULL)
3281 ctxt->sax->characters(ctxt->userData,
3282 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003283 line = ctxt->input->line;
3284 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003285 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003286 if (ctxt->sax->characters != NULL)
3287 ctxt->sax->characters(ctxt->userData,
3288 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003289 line = ctxt->input->line;
3290 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003291 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292 }
3293 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003294 if (*in == 0xD) {
3295 in++;
3296 if (*in == 0xA) {
3297 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003299 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003300 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003301 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003302 in--;
3303 }
3304 if (*in == '<') {
3305 return;
3306 }
3307 if (*in == '&') {
3308 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003309 }
3310 SHRINK;
3311 GROW;
3312 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003313 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003314 nbchar = 0;
3315 }
Daniel Veillard50582112001-03-26 22:52:16 +00003316 ctxt->input->line = line;
3317 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003318 xmlParseCharDataComplex(ctxt, cdata);
3319}
3320
Daniel Veillard01c13b52002-12-10 15:19:08 +00003321/**
3322 * xmlParseCharDataComplex:
3323 * @ctxt: an XML parser context
3324 * @cdata: int indicating whether we are within a CDATA section
3325 *
3326 * parse a CharData section.this is the fallback function
3327 * of xmlParseCharData() when the parsing requires handling
3328 * of non-ASCII characters.
3329 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003330void
3331xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003332 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3333 int nbchar = 0;
3334 int cur, l;
3335 int count = 0;
3336
3337 SHRINK;
3338 GROW;
3339 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003340 while ((cur != '<') && /* checked */
3341 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003342 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003343 if ((cur == ']') && (NXT(1) == ']') &&
3344 (NXT(2) == '>')) {
3345 if (cdata) break;
3346 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003347 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003348 }
3349 }
3350 COPY_BUF(l,buf,nbchar,cur);
3351 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003352 buf[nbchar] = 0;
3353
Owen Taylor3473f882001-02-23 17:55:21 +00003354 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003355 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003356 */
3357 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003358 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003359 if (ctxt->sax->ignorableWhitespace != NULL)
3360 ctxt->sax->ignorableWhitespace(ctxt->userData,
3361 buf, nbchar);
3362 } else {
3363 if (ctxt->sax->characters != NULL)
3364 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3365 }
3366 }
3367 nbchar = 0;
3368 }
3369 count++;
3370 if (count > 50) {
3371 GROW;
3372 count = 0;
3373 }
3374 NEXTL(l);
3375 cur = CUR_CHAR(l);
3376 }
3377 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003378 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003380 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003381 */
3382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003383 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003384 if (ctxt->sax->ignorableWhitespace != NULL)
3385 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3386 } else {
3387 if (ctxt->sax->characters != NULL)
3388 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3389 }
3390 }
3391 }
3392}
3393
3394/**
3395 * xmlParseExternalID:
3396 * @ctxt: an XML parser context
3397 * @publicID: a xmlChar** receiving PubidLiteral
3398 * @strict: indicate whether we should restrict parsing to only
3399 * production [75], see NOTE below
3400 *
3401 * Parse an External ID or a Public ID
3402 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003403 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003404 * 'PUBLIC' S PubidLiteral S SystemLiteral
3405 *
3406 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3407 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3408 *
3409 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3410 *
3411 * Returns the function returns SystemLiteral and in the second
3412 * case publicID receives PubidLiteral, is strict is off
3413 * it is possible to return NULL and have publicID set.
3414 */
3415
3416xmlChar *
3417xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3418 xmlChar *URI = NULL;
3419
3420 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003421
3422 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003423 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003424 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003425 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3427 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003428 }
3429 SKIP_BLANKS;
3430 URI = xmlParseSystemLiteral(ctxt);
3431 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003432 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003433 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003434 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003435 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003436 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003437 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003438 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003439 }
3440 SKIP_BLANKS;
3441 *publicID = xmlParsePubidLiteral(ctxt);
3442 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003443 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 if (strict) {
3446 /*
3447 * We don't handle [83] so "S SystemLiteral" is required.
3448 */
William M. Brack76e95df2003-10-18 16:20:14 +00003449 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003451 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003452 }
3453 } else {
3454 /*
3455 * We handle [83] so we return immediately, if
3456 * "S SystemLiteral" is not detected. From a purely parsing
3457 * point of view that's a nice mess.
3458 */
3459 const xmlChar *ptr;
3460 GROW;
3461
3462 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003463 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003464
William M. Brack76e95df2003-10-18 16:20:14 +00003465 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003466 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3467 }
3468 SKIP_BLANKS;
3469 URI = xmlParseSystemLiteral(ctxt);
3470 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003471 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003472 }
3473 }
3474 return(URI);
3475}
3476
3477/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003478 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003479 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003480 * @buf: the already parsed part of the buffer
3481 * @len: number of bytes filles in the buffer
3482 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003483 *
3484 * Skip an XML (SGML) comment <!-- .... -->
3485 * The spec says that "For compatibility, the string "--" (double-hyphen)
3486 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003487 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003488 *
3489 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3490 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003491static void
3492xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003493 int q, ql;
3494 int r, rl;
3495 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003496 xmlParserInputPtr input = ctxt->input;
3497 int count = 0;
3498
Owen Taylor3473f882001-02-23 17:55:21 +00003499 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003500 len = 0;
3501 size = XML_PARSER_BUFFER_SIZE;
3502 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3503 if (buf == NULL) {
3504 xmlErrMemory(ctxt, NULL);
3505 return;
3506 }
Owen Taylor3473f882001-02-23 17:55:21 +00003507 }
3508 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003509 if (q == 0)
3510 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 NEXTL(ql);
3512 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003513 if (r == 0)
3514 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003515 NEXTL(rl);
3516 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003517 if (cur == 0)
3518 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003519 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003520 ((cur != '>') ||
3521 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003522 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003523 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 }
3525 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003526 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003527 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003528 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3529 if (new_buf == NULL) {
3530 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003531 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003532 return;
3533 }
William M. Bracka3215c72004-07-31 16:24:01 +00003534 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003535 }
3536 COPY_BUF(ql,buf,len,q);
3537 q = r;
3538 ql = rl;
3539 r = cur;
3540 rl = l;
3541
3542 count++;
3543 if (count > 50) {
3544 GROW;
3545 count = 0;
3546 }
3547 NEXTL(l);
3548 cur = CUR_CHAR(l);
3549 if (cur == 0) {
3550 SHRINK;
3551 GROW;
3552 cur = CUR_CHAR(l);
3553 }
3554 }
3555 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003556 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003557 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003559 xmlFree(buf);
3560 } else {
3561 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003562 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3563 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 NEXT;
3566 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3567 (!ctxt->disableSAX))
3568 ctxt->sax->comment(ctxt->userData, buf);
3569 xmlFree(buf);
3570 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003571 return;
3572not_terminated:
3573 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3574 "Comment not terminated\n", NULL);
3575 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003576}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003577/**
3578 * xmlParseComment:
3579 * @ctxt: an XML parser context
3580 *
3581 * Skip an XML (SGML) comment <!-- .... -->
3582 * The spec says that "For compatibility, the string "--" (double-hyphen)
3583 * must not occur within comments. "
3584 *
3585 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3586 */
3587void
3588xmlParseComment(xmlParserCtxtPtr ctxt) {
3589 xmlChar *buf = NULL;
3590 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003591 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003592 xmlParserInputState state;
3593 const xmlChar *in;
3594 int nbchar = 0, ccol;
3595
3596 /*
3597 * Check that there is a comment right here.
3598 */
3599 if ((RAW != '<') || (NXT(1) != '!') ||
3600 (NXT(2) != '-') || (NXT(3) != '-')) return;
3601
3602 state = ctxt->instate;
3603 ctxt->instate = XML_PARSER_COMMENT;
3604 SKIP(4);
3605 SHRINK;
3606 GROW;
3607
3608 /*
3609 * Accelerated common case where input don't need to be
3610 * modified before passing it to the handler.
3611 */
3612 in = ctxt->input->cur;
3613 do {
3614 if (*in == 0xA) {
3615 ctxt->input->line++; ctxt->input->col = 1;
3616 in++;
3617 while (*in == 0xA) {
3618 ctxt->input->line++; ctxt->input->col = 1;
3619 in++;
3620 }
3621 }
3622get_more:
3623 ccol = ctxt->input->col;
3624 while (((*in > '-') && (*in <= 0x7F)) ||
3625 ((*in >= 0x20) && (*in < '-')) ||
3626 (*in == 0x09)) {
3627 in++;
3628 ccol++;
3629 }
3630 ctxt->input->col = ccol;
3631 if (*in == 0xA) {
3632 ctxt->input->line++; ctxt->input->col = 1;
3633 in++;
3634 while (*in == 0xA) {
3635 ctxt->input->line++; ctxt->input->col = 1;
3636 in++;
3637 }
3638 goto get_more;
3639 }
3640 nbchar = in - ctxt->input->cur;
3641 /*
3642 * save current set of data
3643 */
3644 if (nbchar > 0) {
3645 if ((ctxt->sax != NULL) &&
3646 (ctxt->sax->comment != NULL)) {
3647 if (buf == NULL) {
3648 if ((*in == '-') && (in[1] == '-'))
3649 size = nbchar + 1;
3650 else
3651 size = XML_PARSER_BUFFER_SIZE + nbchar;
3652 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3653 if (buf == NULL) {
3654 xmlErrMemory(ctxt, NULL);
3655 ctxt->instate = state;
3656 return;
3657 }
3658 len = 0;
3659 } else if (len + nbchar + 1 >= size) {
3660 xmlChar *new_buf;
3661 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3662 new_buf = (xmlChar *) xmlRealloc(buf,
3663 size * sizeof(xmlChar));
3664 if (new_buf == NULL) {
3665 xmlFree (buf);
3666 xmlErrMemory(ctxt, NULL);
3667 ctxt->instate = state;
3668 return;
3669 }
3670 buf = new_buf;
3671 }
3672 memcpy(&buf[len], ctxt->input->cur, nbchar);
3673 len += nbchar;
3674 buf[len] = 0;
3675 }
3676 }
3677 ctxt->input->cur = in;
3678 if (*in == 0xA)
3679
3680 if (*in == 0xD) {
3681 in++;
3682 if (*in == 0xA) {
3683 ctxt->input->cur = in;
3684 in++;
3685 ctxt->input->line++; ctxt->input->col = 1;
3686 continue; /* while */
3687 }
3688 in--;
3689 }
3690 SHRINK;
3691 GROW;
3692 in = ctxt->input->cur;
3693 if (*in == '-') {
3694 if (in[1] == '-') {
3695 if (in[2] == '>') {
3696 SKIP(3);
3697 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3698 (!ctxt->disableSAX)) {
3699 if (buf != NULL)
3700 ctxt->sax->comment(ctxt->userData, buf);
3701 else
3702 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3703 }
3704 if (buf != NULL)
3705 xmlFree(buf);
3706 ctxt->instate = state;
3707 return;
3708 }
3709 if (buf != NULL)
3710 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3711 "Comment not terminated \n<!--%.50s\n",
3712 buf);
3713 else
3714 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3715 "Comment not terminated \n", NULL);
3716 in++;
3717 ctxt->input->col++;
3718 }
3719 in++;
3720 ctxt->input->col++;
3721 goto get_more;
3722 }
3723 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3724 xmlParseCommentComplex(ctxt, buf, len, size);
3725 ctxt->instate = state;
3726 return;
3727}
3728
Owen Taylor3473f882001-02-23 17:55:21 +00003729
3730/**
3731 * xmlParsePITarget:
3732 * @ctxt: an XML parser context
3733 *
3734 * parse the name of a PI
3735 *
3736 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3737 *
3738 * Returns the PITarget name or NULL
3739 */
3740
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003741const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003742xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003743 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003744
3745 name = xmlParseName(ctxt);
3746 if ((name != NULL) &&
3747 ((name[0] == 'x') || (name[0] == 'X')) &&
3748 ((name[1] == 'm') || (name[1] == 'M')) &&
3749 ((name[2] == 'l') || (name[2] == 'L'))) {
3750 int i;
3751 if ((name[0] == 'x') && (name[1] == 'm') &&
3752 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return(name);
3756 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003757 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003758 return(name);
3759 }
3760 for (i = 0;;i++) {
3761 if (xmlW3CPIs[i] == NULL) break;
3762 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3763 return(name);
3764 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003765 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3766 "xmlParsePITarget: invalid name prefix 'xml'\n",
3767 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003768 }
3769 return(name);
3770}
3771
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003772#ifdef LIBXML_CATALOG_ENABLED
3773/**
3774 * xmlParseCatalogPI:
3775 * @ctxt: an XML parser context
3776 * @catalog: the PI value string
3777 *
3778 * parse an XML Catalog Processing Instruction.
3779 *
3780 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3781 *
3782 * Occurs only if allowed by the user and if happening in the Misc
3783 * part of the document before any doctype informations
3784 * This will add the given catalog to the parsing context in order
3785 * to be used if there is a resolution need further down in the document
3786 */
3787
3788static void
3789xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3790 xmlChar *URL = NULL;
3791 const xmlChar *tmp, *base;
3792 xmlChar marker;
3793
3794 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003795 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003796 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3797 goto error;
3798 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003799 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003800 if (*tmp != '=') {
3801 return;
3802 }
3803 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003804 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003805 marker = *tmp;
3806 if ((marker != '\'') && (marker != '"'))
3807 goto error;
3808 tmp++;
3809 base = tmp;
3810 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3811 if (*tmp == 0)
3812 goto error;
3813 URL = xmlStrndup(base, tmp - base);
3814 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003815 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003816 if (*tmp != 0)
3817 goto error;
3818
3819 if (URL != NULL) {
3820 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3821 xmlFree(URL);
3822 }
3823 return;
3824
3825error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003826 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3827 "Catalog PI syntax error: %s\n",
3828 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003829 if (URL != NULL)
3830 xmlFree(URL);
3831}
3832#endif
3833
Owen Taylor3473f882001-02-23 17:55:21 +00003834/**
3835 * xmlParsePI:
3836 * @ctxt: an XML parser context
3837 *
3838 * parse an XML Processing Instruction.
3839 *
3840 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3841 *
3842 * The processing is transfered to SAX once parsed.
3843 */
3844
3845void
3846xmlParsePI(xmlParserCtxtPtr ctxt) {
3847 xmlChar *buf = NULL;
3848 int len = 0;
3849 int size = XML_PARSER_BUFFER_SIZE;
3850 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003851 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 xmlParserInputState state;
3853 int count = 0;
3854
3855 if ((RAW == '<') && (NXT(1) == '?')) {
3856 xmlParserInputPtr input = ctxt->input;
3857 state = ctxt->instate;
3858 ctxt->instate = XML_PARSER_PI;
3859 /*
3860 * this is a Processing Instruction.
3861 */
3862 SKIP(2);
3863 SHRINK;
3864
3865 /*
3866 * Parse the target name and check for special support like
3867 * namespace.
3868 */
3869 target = xmlParsePITarget(ctxt);
3870 if (target != NULL) {
3871 if ((RAW == '?') && (NXT(1) == '>')) {
3872 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003873 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3874 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003875 }
3876 SKIP(2);
3877
3878 /*
3879 * SAX: PI detected.
3880 */
3881 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3882 (ctxt->sax->processingInstruction != NULL))
3883 ctxt->sax->processingInstruction(ctxt->userData,
3884 target, NULL);
3885 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003886 return;
3887 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003888 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003889 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003890 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 ctxt->instate = state;
3892 return;
3893 }
3894 cur = CUR;
3895 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003896 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3897 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 }
3899 SKIP_BLANKS;
3900 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003901 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003902 ((cur != '?') || (NXT(1) != '>'))) {
3903 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003904 xmlChar *tmp;
3905
Owen Taylor3473f882001-02-23 17:55:21 +00003906 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003907 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3908 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003909 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003910 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003911 ctxt->instate = state;
3912 return;
3913 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003914 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003915 }
3916 count++;
3917 if (count > 50) {
3918 GROW;
3919 count = 0;
3920 }
3921 COPY_BUF(l,buf,len,cur);
3922 NEXTL(l);
3923 cur = CUR_CHAR(l);
3924 if (cur == 0) {
3925 SHRINK;
3926 GROW;
3927 cur = CUR_CHAR(l);
3928 }
3929 }
3930 buf[len] = 0;
3931 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003932 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3933 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003934 } else {
3935 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003936 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3937 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003938 }
3939 SKIP(2);
3940
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003941#ifdef LIBXML_CATALOG_ENABLED
3942 if (((state == XML_PARSER_MISC) ||
3943 (state == XML_PARSER_START)) &&
3944 (xmlStrEqual(target, XML_CATALOG_PI))) {
3945 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3946 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3947 (allow == XML_CATA_ALLOW_ALL))
3948 xmlParseCatalogPI(ctxt, buf);
3949 }
3950#endif
3951
3952
Owen Taylor3473f882001-02-23 17:55:21 +00003953 /*
3954 * SAX: PI detected.
3955 */
3956 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3957 (ctxt->sax->processingInstruction != NULL))
3958 ctxt->sax->processingInstruction(ctxt->userData,
3959 target, buf);
3960 }
3961 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003962 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003963 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003964 }
3965 ctxt->instate = state;
3966 }
3967}
3968
3969/**
3970 * xmlParseNotationDecl:
3971 * @ctxt: an XML parser context
3972 *
3973 * parse a notation declaration
3974 *
3975 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3976 *
3977 * Hence there is actually 3 choices:
3978 * 'PUBLIC' S PubidLiteral
3979 * 'PUBLIC' S PubidLiteral S SystemLiteral
3980 * and 'SYSTEM' S SystemLiteral
3981 *
3982 * See the NOTE on xmlParseExternalID().
3983 */
3984
3985void
3986xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003987 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003988 xmlChar *Pubid;
3989 xmlChar *Systemid;
3990
Daniel Veillarda07050d2003-10-19 14:46:32 +00003991 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003992 xmlParserInputPtr input = ctxt->input;
3993 SHRINK;
3994 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003995 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003996 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3997 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003998 return;
3999 }
4000 SKIP_BLANKS;
4001
Daniel Veillard76d66f42001-05-16 21:05:17 +00004002 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004004 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004005 return;
4006 }
William M. Brack76e95df2003-10-18 16:20:14 +00004007 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004009 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004010 return;
4011 }
4012 SKIP_BLANKS;
4013
4014 /*
4015 * Parse the IDs.
4016 */
4017 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4018 SKIP_BLANKS;
4019
4020 if (RAW == '>') {
4021 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4023 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004024 }
4025 NEXT;
4026 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4027 (ctxt->sax->notationDecl != NULL))
4028 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4029 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004030 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004031 }
Owen Taylor3473f882001-02-23 17:55:21 +00004032 if (Systemid != NULL) xmlFree(Systemid);
4033 if (Pubid != NULL) xmlFree(Pubid);
4034 }
4035}
4036
4037/**
4038 * xmlParseEntityDecl:
4039 * @ctxt: an XML parser context
4040 *
4041 * parse <!ENTITY declarations
4042 *
4043 * [70] EntityDecl ::= GEDecl | PEDecl
4044 *
4045 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4046 *
4047 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4048 *
4049 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4050 *
4051 * [74] PEDef ::= EntityValue | ExternalID
4052 *
4053 * [76] NDataDecl ::= S 'NDATA' S Name
4054 *
4055 * [ VC: Notation Declared ]
4056 * The Name must match the declared name of a notation.
4057 */
4058
4059void
4060xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004061 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004062 xmlChar *value = NULL;
4063 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004064 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 int isParameter = 0;
4066 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004067 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004068
Daniel Veillard4c778d82005-01-23 17:37:44 +00004069 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004070 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004071 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004072 SHRINK;
4073 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004074 skipped = SKIP_BLANKS;
4075 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004076 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4077 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004078 }
Owen Taylor3473f882001-02-23 17:55:21 +00004079
4080 if (RAW == '%') {
4081 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004082 skipped = SKIP_BLANKS;
4083 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4085 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004086 }
Owen Taylor3473f882001-02-23 17:55:21 +00004087 isParameter = 1;
4088 }
4089
Daniel Veillard76d66f42001-05-16 21:05:17 +00004090 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004091 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004092 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4093 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004094 return;
4095 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004096 skipped = SKIP_BLANKS;
4097 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004098 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4099 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004100 }
Owen Taylor3473f882001-02-23 17:55:21 +00004101
Daniel Veillardf5582f12002-06-11 10:08:16 +00004102 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004103 /*
4104 * handle the various case of definitions...
4105 */
4106 if (isParameter) {
4107 if ((RAW == '"') || (RAW == '\'')) {
4108 value = xmlParseEntityValue(ctxt, &orig);
4109 if (value) {
4110 if ((ctxt->sax != NULL) &&
4111 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4112 ctxt->sax->entityDecl(ctxt->userData, name,
4113 XML_INTERNAL_PARAMETER_ENTITY,
4114 NULL, NULL, value);
4115 }
4116 } else {
4117 URI = xmlParseExternalID(ctxt, &literal, 1);
4118 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004119 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004120 }
4121 if (URI) {
4122 xmlURIPtr uri;
4123
4124 uri = xmlParseURI((const char *) URI);
4125 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004126 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4127 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004128 /*
4129 * This really ought to be a well formedness error
4130 * but the XML Core WG decided otherwise c.f. issue
4131 * E26 of the XML erratas.
4132 */
Owen Taylor3473f882001-02-23 17:55:21 +00004133 } else {
4134 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004135 /*
4136 * Okay this is foolish to block those but not
4137 * invalid URIs.
4138 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004139 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 } else {
4141 if ((ctxt->sax != NULL) &&
4142 (!ctxt->disableSAX) &&
4143 (ctxt->sax->entityDecl != NULL))
4144 ctxt->sax->entityDecl(ctxt->userData, name,
4145 XML_EXTERNAL_PARAMETER_ENTITY,
4146 literal, URI, NULL);
4147 }
4148 xmlFreeURI(uri);
4149 }
4150 }
4151 }
4152 } else {
4153 if ((RAW == '"') || (RAW == '\'')) {
4154 value = xmlParseEntityValue(ctxt, &orig);
4155 if ((ctxt->sax != NULL) &&
4156 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4157 ctxt->sax->entityDecl(ctxt->userData, name,
4158 XML_INTERNAL_GENERAL_ENTITY,
4159 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004160 /*
4161 * For expat compatibility in SAX mode.
4162 */
4163 if ((ctxt->myDoc == NULL) ||
4164 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4165 if (ctxt->myDoc == NULL) {
4166 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4167 }
4168 if (ctxt->myDoc->intSubset == NULL)
4169 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4170 BAD_CAST "fake", NULL, NULL);
4171
Daniel Veillard1af9a412003-08-20 22:54:39 +00004172 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4173 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004174 }
Owen Taylor3473f882001-02-23 17:55:21 +00004175 } else {
4176 URI = xmlParseExternalID(ctxt, &literal, 1);
4177 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004178 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004179 }
4180 if (URI) {
4181 xmlURIPtr uri;
4182
4183 uri = xmlParseURI((const char *)URI);
4184 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004185 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4186 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004187 /*
4188 * This really ought to be a well formedness error
4189 * but the XML Core WG decided otherwise c.f. issue
4190 * E26 of the XML erratas.
4191 */
Owen Taylor3473f882001-02-23 17:55:21 +00004192 } else {
4193 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004194 /*
4195 * Okay this is foolish to block those but not
4196 * invalid URIs.
4197 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004198 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 xmlFreeURI(uri);
4201 }
4202 }
William M. Brack76e95df2003-10-18 16:20:14 +00004203 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004204 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4205 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004206 }
4207 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004208 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004209 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004210 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004211 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4212 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004213 }
4214 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004215 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4217 (ctxt->sax->unparsedEntityDecl != NULL))
4218 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4219 literal, URI, ndata);
4220 } else {
4221 if ((ctxt->sax != NULL) &&
4222 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4223 ctxt->sax->entityDecl(ctxt->userData, name,
4224 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4225 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004226 /*
4227 * For expat compatibility in SAX mode.
4228 * assuming the entity repalcement was asked for
4229 */
4230 if ((ctxt->replaceEntities != 0) &&
4231 ((ctxt->myDoc == NULL) ||
4232 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4233 if (ctxt->myDoc == NULL) {
4234 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4235 }
4236
4237 if (ctxt->myDoc->intSubset == NULL)
4238 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4239 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004240 xmlSAX2EntityDecl(ctxt, name,
4241 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4242 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004243 }
Owen Taylor3473f882001-02-23 17:55:21 +00004244 }
4245 }
4246 }
4247 SKIP_BLANKS;
4248 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004249 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004250 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004251 } else {
4252 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004253 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4254 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004255 }
4256 NEXT;
4257 }
4258 if (orig != NULL) {
4259 /*
4260 * Ugly mechanism to save the raw entity value.
4261 */
4262 xmlEntityPtr cur = NULL;
4263
4264 if (isParameter) {
4265 if ((ctxt->sax != NULL) &&
4266 (ctxt->sax->getParameterEntity != NULL))
4267 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4268 } else {
4269 if ((ctxt->sax != NULL) &&
4270 (ctxt->sax->getEntity != NULL))
4271 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004272 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004273 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004274 }
Owen Taylor3473f882001-02-23 17:55:21 +00004275 }
4276 if (cur != NULL) {
4277 if (cur->orig != NULL)
4278 xmlFree(orig);
4279 else
4280 cur->orig = orig;
4281 } else
4282 xmlFree(orig);
4283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284 if (value != NULL) xmlFree(value);
4285 if (URI != NULL) xmlFree(URI);
4286 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004287 }
4288}
4289
4290/**
4291 * xmlParseDefaultDecl:
4292 * @ctxt: an XML parser context
4293 * @value: Receive a possible fixed default value for the attribute
4294 *
4295 * Parse an attribute default declaration
4296 *
4297 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4298 *
4299 * [ VC: Required Attribute ]
4300 * if the default declaration is the keyword #REQUIRED, then the
4301 * attribute must be specified for all elements of the type in the
4302 * attribute-list declaration.
4303 *
4304 * [ VC: Attribute Default Legal ]
4305 * The declared default value must meet the lexical constraints of
4306 * the declared attribute type c.f. xmlValidateAttributeDecl()
4307 *
4308 * [ VC: Fixed Attribute Default ]
4309 * if an attribute has a default value declared with the #FIXED
4310 * keyword, instances of that attribute must match the default value.
4311 *
4312 * [ WFC: No < in Attribute Values ]
4313 * handled in xmlParseAttValue()
4314 *
4315 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4316 * or XML_ATTRIBUTE_FIXED.
4317 */
4318
4319int
4320xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4321 int val;
4322 xmlChar *ret;
4323
4324 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004325 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004326 SKIP(9);
4327 return(XML_ATTRIBUTE_REQUIRED);
4328 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004329 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004330 SKIP(8);
4331 return(XML_ATTRIBUTE_IMPLIED);
4332 }
4333 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004334 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004335 SKIP(6);
4336 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004337 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4339 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004340 }
4341 SKIP_BLANKS;
4342 }
4343 ret = xmlParseAttValue(ctxt);
4344 ctxt->instate = XML_PARSER_DTD;
4345 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004346 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004347 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004348 } else
4349 *value = ret;
4350 return(val);
4351}
4352
4353/**
4354 * xmlParseNotationType:
4355 * @ctxt: an XML parser context
4356 *
4357 * parse an Notation attribute type.
4358 *
4359 * Note: the leading 'NOTATION' S part has already being parsed...
4360 *
4361 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4362 *
4363 * [ VC: Notation Attributes ]
4364 * Values of this type must match one of the notation names included
4365 * in the declaration; all notation names in the declaration must be declared.
4366 *
4367 * Returns: the notation attribute tree built while parsing
4368 */
4369
4370xmlEnumerationPtr
4371xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004372 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4374
4375 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004376 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004377 return(NULL);
4378 }
4379 SHRINK;
4380 do {
4381 NEXT;
4382 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004383 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004384 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004385 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4386 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004387 return(ret);
4388 }
4389 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (cur == NULL) return(ret);
4391 if (last == NULL) ret = last = cur;
4392 else {
4393 last->next = cur;
4394 last = cur;
4395 }
4396 SKIP_BLANKS;
4397 } while (RAW == '|');
4398 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004399 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004400 if ((last != NULL) && (last != ret))
4401 xmlFreeEnumeration(last);
4402 return(ret);
4403 }
4404 NEXT;
4405 return(ret);
4406}
4407
4408/**
4409 * xmlParseEnumerationType:
4410 * @ctxt: an XML parser context
4411 *
4412 * parse an Enumeration attribute type.
4413 *
4414 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4415 *
4416 * [ VC: Enumeration ]
4417 * Values of this type must match one of the Nmtoken tokens in
4418 * the declaration
4419 *
4420 * Returns: the enumeration attribute tree built while parsing
4421 */
4422
4423xmlEnumerationPtr
4424xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4425 xmlChar *name;
4426 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4427
4428 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004429 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004430 return(NULL);
4431 }
4432 SHRINK;
4433 do {
4434 NEXT;
4435 SKIP_BLANKS;
4436 name = xmlParseNmtoken(ctxt);
4437 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return(ret);
4440 }
4441 cur = xmlCreateEnumeration(name);
4442 xmlFree(name);
4443 if (cur == NULL) return(ret);
4444 if (last == NULL) ret = last = cur;
4445 else {
4446 last->next = cur;
4447 last = cur;
4448 }
4449 SKIP_BLANKS;
4450 } while (RAW == '|');
4451 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004452 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004453 return(ret);
4454 }
4455 NEXT;
4456 return(ret);
4457}
4458
4459/**
4460 * xmlParseEnumeratedType:
4461 * @ctxt: an XML parser context
4462 * @tree: the enumeration tree built while parsing
4463 *
4464 * parse an Enumerated attribute type.
4465 *
4466 * [57] EnumeratedType ::= NotationType | Enumeration
4467 *
4468 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4469 *
4470 *
4471 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4472 */
4473
4474int
4475xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004476 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004477 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004478 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004479 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4480 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004481 return(0);
4482 }
4483 SKIP_BLANKS;
4484 *tree = xmlParseNotationType(ctxt);
4485 if (*tree == NULL) return(0);
4486 return(XML_ATTRIBUTE_NOTATION);
4487 }
4488 *tree = xmlParseEnumerationType(ctxt);
4489 if (*tree == NULL) return(0);
4490 return(XML_ATTRIBUTE_ENUMERATION);
4491}
4492
4493/**
4494 * xmlParseAttributeType:
4495 * @ctxt: an XML parser context
4496 * @tree: the enumeration tree built while parsing
4497 *
4498 * parse the Attribute list def for an element
4499 *
4500 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4501 *
4502 * [55] StringType ::= 'CDATA'
4503 *
4504 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4505 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4506 *
4507 * Validity constraints for attribute values syntax are checked in
4508 * xmlValidateAttributeValue()
4509 *
4510 * [ VC: ID ]
4511 * Values of type ID must match the Name production. A name must not
4512 * appear more than once in an XML document as a value of this type;
4513 * i.e., ID values must uniquely identify the elements which bear them.
4514 *
4515 * [ VC: One ID per Element Type ]
4516 * No element type may have more than one ID attribute specified.
4517 *
4518 * [ VC: ID Attribute Default ]
4519 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4520 *
4521 * [ VC: IDREF ]
4522 * Values of type IDREF must match the Name production, and values
4523 * of type IDREFS must match Names; each IDREF Name must match the value
4524 * of an ID attribute on some element in the XML document; i.e. IDREF
4525 * values must match the value of some ID attribute.
4526 *
4527 * [ VC: Entity Name ]
4528 * Values of type ENTITY must match the Name production, values
4529 * of type ENTITIES must match Names; each Entity Name must match the
4530 * name of an unparsed entity declared in the DTD.
4531 *
4532 * [ VC: Name Token ]
4533 * Values of type NMTOKEN must match the Nmtoken production; values
4534 * of type NMTOKENS must match Nmtokens.
4535 *
4536 * Returns the attribute type
4537 */
4538int
4539xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4540 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004541 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004542 SKIP(5);
4543 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004544 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004545 SKIP(6);
4546 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004547 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004548 SKIP(5);
4549 return(XML_ATTRIBUTE_IDREF);
4550 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4551 SKIP(2);
4552 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004553 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004554 SKIP(6);
4555 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004556 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004557 SKIP(8);
4558 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004559 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004560 SKIP(8);
4561 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004562 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004563 SKIP(7);
4564 return(XML_ATTRIBUTE_NMTOKEN);
4565 }
4566 return(xmlParseEnumeratedType(ctxt, tree));
4567}
4568
4569/**
4570 * xmlParseAttributeListDecl:
4571 * @ctxt: an XML parser context
4572 *
4573 * : parse the Attribute list def for an element
4574 *
4575 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4576 *
4577 * [53] AttDef ::= S Name S AttType S DefaultDecl
4578 *
4579 */
4580void
4581xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004582 const xmlChar *elemName;
4583 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004584 xmlEnumerationPtr tree;
4585
Daniel Veillarda07050d2003-10-19 14:46:32 +00004586 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004587 xmlParserInputPtr input = ctxt->input;
4588
4589 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004590 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004591 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004592 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004593 }
4594 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004595 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004596 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004597 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4598 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004599 return;
4600 }
4601 SKIP_BLANKS;
4602 GROW;
4603 while (RAW != '>') {
4604 const xmlChar *check = CUR_PTR;
4605 int type;
4606 int def;
4607 xmlChar *defaultValue = NULL;
4608
4609 GROW;
4610 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004611 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004612 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004613 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4614 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004615 break;
4616 }
4617 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004618 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004619 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004620 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004621 if (defaultValue != NULL)
4622 xmlFree(defaultValue);
4623 break;
4624 }
4625 SKIP_BLANKS;
4626
4627 type = xmlParseAttributeType(ctxt, &tree);
4628 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004629 if (defaultValue != NULL)
4630 xmlFree(defaultValue);
4631 break;
4632 }
4633
4634 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004635 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004636 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4637 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004638 if (defaultValue != NULL)
4639 xmlFree(defaultValue);
4640 if (tree != NULL)
4641 xmlFreeEnumeration(tree);
4642 break;
4643 }
4644 SKIP_BLANKS;
4645
4646 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4647 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004648 if (defaultValue != NULL)
4649 xmlFree(defaultValue);
4650 if (tree != NULL)
4651 xmlFreeEnumeration(tree);
4652 break;
4653 }
4654
4655 GROW;
4656 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004657 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004658 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004659 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004660 if (defaultValue != NULL)
4661 xmlFree(defaultValue);
4662 if (tree != NULL)
4663 xmlFreeEnumeration(tree);
4664 break;
4665 }
4666 SKIP_BLANKS;
4667 }
4668 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004669 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4670 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004671 if (defaultValue != NULL)
4672 xmlFree(defaultValue);
4673 if (tree != NULL)
4674 xmlFreeEnumeration(tree);
4675 break;
4676 }
4677 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4678 (ctxt->sax->attributeDecl != NULL))
4679 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4680 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004681 else if (tree != NULL)
4682 xmlFreeEnumeration(tree);
4683
4684 if ((ctxt->sax2) && (defaultValue != NULL) &&
4685 (def != XML_ATTRIBUTE_IMPLIED) &&
4686 (def != XML_ATTRIBUTE_REQUIRED)) {
4687 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4688 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004689 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4690 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4691 }
Owen Taylor3473f882001-02-23 17:55:21 +00004692 if (defaultValue != NULL)
4693 xmlFree(defaultValue);
4694 GROW;
4695 }
4696 if (RAW == '>') {
4697 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004698 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4699 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004700 }
4701 NEXT;
4702 }
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
4704}
4705
4706/**
4707 * xmlParseElementMixedContentDecl:
4708 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004709 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004710 *
4711 * parse the declaration for a Mixed Element content
4712 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4713 *
4714 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4715 * '(' S? '#PCDATA' S? ')'
4716 *
4717 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4718 *
4719 * [ VC: No Duplicate Types ]
4720 * The same name must not appear more than once in a single
4721 * mixed-content declaration.
4722 *
4723 * returns: the list of the xmlElementContentPtr describing the element choices
4724 */
4725xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004726xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004727 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004728 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004729
4730 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004731 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004732 SKIP(7);
4733 SKIP_BLANKS;
4734 SHRINK;
4735 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004736 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004737 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4738"Element content declaration doesn't start and stop in the same entity\n",
4739 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004742 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 if (RAW == '*') {
4744 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4745 NEXT;
4746 }
4747 return(ret);
4748 }
4749 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004750 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004751 if (ret == NULL) return(NULL);
4752 }
4753 while (RAW == '|') {
4754 NEXT;
4755 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004756 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004757 if (ret == NULL) return(NULL);
4758 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004759 if (cur != NULL)
4760 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004761 cur = ret;
4762 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004763 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004764 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004765 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004766 if (n->c1 != NULL)
4767 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004768 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004769 if (n != NULL)
4770 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004772 }
4773 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004774 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004775 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004776 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004777 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004778 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004779 return(NULL);
4780 }
4781 SKIP_BLANKS;
4782 GROW;
4783 }
4784 if ((RAW == ')') && (NXT(1) == '*')) {
4785 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004786 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004787 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004788 if (cur->c2 != NULL)
4789 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004790 }
4791 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004793 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4794"Element content declaration doesn't start and stop in the same entity\n",
4795 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004796 }
Owen Taylor3473f882001-02-23 17:55:21 +00004797 SKIP(2);
4798 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004799 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004800 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004801 return(NULL);
4802 }
4803
4804 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004805 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
4807 return(ret);
4808}
4809
4810/**
4811 * xmlParseElementChildrenContentDecl:
4812 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004813 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004814 *
4815 * parse the declaration for a Mixed Element content
4816 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4817 *
4818 *
4819 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4820 *
4821 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4822 *
4823 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4824 *
4825 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4826 *
4827 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4828 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004829 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004830 * opening or closing parentheses in a choice, seq, or Mixed
4831 * construct is contained in the replacement text for a parameter
4832 * entity, both must be contained in the same replacement text. For
4833 * interoperability, if a parameter-entity reference appears in a
4834 * choice, seq, or Mixed construct, its replacement text should not
4835 * be empty, and neither the first nor last non-blank character of
4836 * the replacement text should be a connector (| or ,).
4837 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004838 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004839 * hierarchy.
4840 */
4841xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004842xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004844 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004845 xmlChar type = 0;
4846
4847 SKIP_BLANKS;
4848 GROW;
4849 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004850 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004851
Owen Taylor3473f882001-02-23 17:55:21 +00004852 /* Recurse on first child */
4853 NEXT;
4854 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP_BLANKS;
4857 GROW;
4858 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004859 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004861 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004862 return(NULL);
4863 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004864 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004865 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004866 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004867 return(NULL);
4868 }
Owen Taylor3473f882001-02-23 17:55:21 +00004869 GROW;
4870 if (RAW == '?') {
4871 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4872 NEXT;
4873 } else if (RAW == '*') {
4874 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4875 NEXT;
4876 } else if (RAW == '+') {
4877 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4878 NEXT;
4879 } else {
4880 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4881 }
Owen Taylor3473f882001-02-23 17:55:21 +00004882 GROW;
4883 }
4884 SKIP_BLANKS;
4885 SHRINK;
4886 while (RAW != ')') {
4887 /*
4888 * Each loop we parse one separator and one element.
4889 */
4890 if (RAW == ',') {
4891 if (type == 0) type = CUR;
4892
4893 /*
4894 * Detect "Name | Name , Name" error
4895 */
4896 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004897 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004898 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004900 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004901 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004902 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004903 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004904 return(NULL);
4905 }
4906 NEXT;
4907
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004908 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004910 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004911 xmlFreeDocElementContent(ctxt->myDoc, last);
4912 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004913 return(NULL);
4914 }
4915 if (last == NULL) {
4916 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004917 if (ret != NULL)
4918 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004919 ret = cur = op;
4920 } else {
4921 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004922 if (op != NULL)
4923 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004924 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004925 if (last != NULL)
4926 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004927 cur =op;
4928 last = NULL;
4929 }
4930 } else if (RAW == '|') {
4931 if (type == 0) type = CUR;
4932
4933 /*
4934 * Detect "Name , Name | Name" error
4935 */
4936 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004937 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004938 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004939 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004940 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004941 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004942 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004943 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004944 return(NULL);
4945 }
4946 NEXT;
4947
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004948 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004949 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004950 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004951 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004952 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004953 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004954 return(NULL);
4955 }
4956 if (last == NULL) {
4957 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004958 if (ret != NULL)
4959 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004960 ret = cur = op;
4961 } else {
4962 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004963 if (op != NULL)
4964 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004965 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004966 if (last != NULL)
4967 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004968 cur =op;
4969 last = NULL;
4970 }
4971 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004972 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004973 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004974 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004975 return(NULL);
4976 }
4977 GROW;
4978 SKIP_BLANKS;
4979 GROW;
4980 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004981 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004982 /* Recurse on second child */
4983 NEXT;
4984 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004985 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004986 SKIP_BLANKS;
4987 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004988 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004989 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004990 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004992 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004993 return(NULL);
4994 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004995 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004996 if (RAW == '?') {
4997 last->ocur = XML_ELEMENT_CONTENT_OPT;
4998 NEXT;
4999 } else if (RAW == '*') {
5000 last->ocur = XML_ELEMENT_CONTENT_MULT;
5001 NEXT;
5002 } else if (RAW == '+') {
5003 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5004 NEXT;
5005 } else {
5006 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5007 }
5008 }
5009 SKIP_BLANKS;
5010 GROW;
5011 }
5012 if ((cur != NULL) && (last != NULL)) {
5013 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005014 if (last != NULL)
5015 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005017 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005018 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5019"Element content declaration doesn't start and stop in the same entity\n",
5020 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005021 }
Owen Taylor3473f882001-02-23 17:55:21 +00005022 NEXT;
5023 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005024 if (ret != NULL) {
5025 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5026 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5027 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5028 else
5029 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5030 }
Owen Taylor3473f882001-02-23 17:55:21 +00005031 NEXT;
5032 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005033 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005034 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005035 cur = ret;
5036 /*
5037 * Some normalization:
5038 * (a | b* | c?)* == (a | b | c)*
5039 */
5040 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5041 if ((cur->c1 != NULL) &&
5042 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5043 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5044 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5045 if ((cur->c2 != NULL) &&
5046 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5047 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5048 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5049 cur = cur->c2;
5050 }
5051 }
Owen Taylor3473f882001-02-23 17:55:21 +00005052 NEXT;
5053 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005054 if (ret != NULL) {
5055 int found = 0;
5056
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005057 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5058 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5059 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005060 else
5061 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005062 /*
5063 * Some normalization:
5064 * (a | b*)+ == (a | b)*
5065 * (a | b?)+ == (a | b)*
5066 */
5067 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5068 if ((cur->c1 != NULL) &&
5069 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5070 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5071 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5072 found = 1;
5073 }
5074 if ((cur->c2 != NULL) &&
5075 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5076 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5077 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5078 found = 1;
5079 }
5080 cur = cur->c2;
5081 }
5082 if (found)
5083 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5084 }
Owen Taylor3473f882001-02-23 17:55:21 +00005085 NEXT;
5086 }
5087 return(ret);
5088}
5089
5090/**
5091 * xmlParseElementContentDecl:
5092 * @ctxt: an XML parser context
5093 * @name: the name of the element being defined.
5094 * @result: the Element Content pointer will be stored here if any
5095 *
5096 * parse the declaration for an Element content either Mixed or Children,
5097 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5098 *
5099 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5100 *
5101 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5102 */
5103
5104int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005105xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005106 xmlElementContentPtr *result) {
5107
5108 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005109 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005110 int res;
5111
5112 *result = NULL;
5113
5114 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005115 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005116 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005117 return(-1);
5118 }
5119 NEXT;
5120 GROW;
5121 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005122 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005123 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005124 res = XML_ELEMENT_TYPE_MIXED;
5125 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005126 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 res = XML_ELEMENT_TYPE_ELEMENT;
5128 }
Owen Taylor3473f882001-02-23 17:55:21 +00005129 SKIP_BLANKS;
5130 *result = tree;
5131 return(res);
5132}
5133
5134/**
5135 * xmlParseElementDecl:
5136 * @ctxt: an XML parser context
5137 *
5138 * parse an Element declaration.
5139 *
5140 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5141 *
5142 * [ VC: Unique Element Type Declaration ]
5143 * No element type may be declared more than once
5144 *
5145 * Returns the type of the element, or -1 in case of error
5146 */
5147int
5148xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005149 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005150 int ret = -1;
5151 xmlElementContentPtr content = NULL;
5152
Daniel Veillard4c778d82005-01-23 17:37:44 +00005153 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005154 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005155 xmlParserInputPtr input = ctxt->input;
5156
5157 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005158 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5160 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
5162 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005163 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005164 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5166 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005167 return(-1);
5168 }
5169 while ((RAW == 0) && (ctxt->inputNr > 1))
5170 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005171 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5173 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005174 }
5175 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005176 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005177 SKIP(5);
5178 /*
5179 * Element must always be empty.
5180 */
5181 ret = XML_ELEMENT_TYPE_EMPTY;
5182 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5183 (NXT(2) == 'Y')) {
5184 SKIP(3);
5185 /*
5186 * Element is a generic container.
5187 */
5188 ret = XML_ELEMENT_TYPE_ANY;
5189 } else if (RAW == '(') {
5190 ret = xmlParseElementContentDecl(ctxt, name, &content);
5191 } else {
5192 /*
5193 * [ WFC: PEs in Internal Subset ] error handling.
5194 */
5195 if ((RAW == '%') && (ctxt->external == 0) &&
5196 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005197 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005198 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005199 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005200 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005201 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5202 }
Owen Taylor3473f882001-02-23 17:55:21 +00005203 return(-1);
5204 }
5205
5206 SKIP_BLANKS;
5207 /*
5208 * Pop-up of finished entities.
5209 */
5210 while ((RAW == 0) && (ctxt->inputNr > 1))
5211 xmlPopInput(ctxt);
5212 SKIP_BLANKS;
5213
5214 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005215 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005216 if (content != NULL) {
5217 xmlFreeDocElementContent(ctxt->myDoc, content);
5218 }
Owen Taylor3473f882001-02-23 17:55:21 +00005219 } else {
5220 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005221 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5222 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005223 }
5224
5225 NEXT;
5226 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005227 (ctxt->sax->elementDecl != NULL)) {
5228 if (content != NULL)
5229 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005230 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5231 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005232 if ((content != NULL) && (content->parent == NULL)) {
5233 /*
5234 * this is a trick: if xmlAddElementDecl is called,
5235 * instead of copying the full tree it is plugged directly
5236 * if called from the parser. Avoid duplicating the
5237 * interfaces or change the API/ABI
5238 */
5239 xmlFreeDocElementContent(ctxt->myDoc, content);
5240 }
5241 } else if (content != NULL) {
5242 xmlFreeDocElementContent(ctxt->myDoc, content);
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 }
Owen Taylor3473f882001-02-23 17:55:21 +00005245 }
5246 return(ret);
5247}
5248
5249/**
Owen Taylor3473f882001-02-23 17:55:21 +00005250 * xmlParseConditionalSections
5251 * @ctxt: an XML parser context
5252 *
5253 * [61] conditionalSect ::= includeSect | ignoreSect
5254 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5255 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5256 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5257 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5258 */
5259
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005260static void
Owen Taylor3473f882001-02-23 17:55:21 +00005261xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5262 SKIP(3);
5263 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(7);
5266 SKIP_BLANKS;
5267 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005268 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005269 } else {
5270 NEXT;
5271 }
5272 if (xmlParserDebugEntities) {
5273 if ((ctxt->input != NULL) && (ctxt->input->filename))
5274 xmlGenericError(xmlGenericErrorContext,
5275 "%s(%d): ", ctxt->input->filename,
5276 ctxt->input->line);
5277 xmlGenericError(xmlGenericErrorContext,
5278 "Entering INCLUDE Conditional Section\n");
5279 }
5280
5281 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5282 (NXT(2) != '>'))) {
5283 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005284 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005285
5286 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5287 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005288 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005289 NEXT;
5290 } else if (RAW == '%') {
5291 xmlParsePEReference(ctxt);
5292 } else
5293 xmlParseMarkupDecl(ctxt);
5294
5295 /*
5296 * Pop-up of finished entities.
5297 */
5298 while ((RAW == 0) && (ctxt->inputNr > 1))
5299 xmlPopInput(ctxt);
5300
Daniel Veillardfdc91562002-07-01 21:52:03 +00005301 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005302 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 break;
5304 }
5305 }
5306 if (xmlParserDebugEntities) {
5307 if ((ctxt->input != NULL) && (ctxt->input->filename))
5308 xmlGenericError(xmlGenericErrorContext,
5309 "%s(%d): ", ctxt->input->filename,
5310 ctxt->input->line);
5311 xmlGenericError(xmlGenericErrorContext,
5312 "Leaving INCLUDE Conditional Section\n");
5313 }
5314
Daniel Veillarda07050d2003-10-19 14:46:32 +00005315 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005316 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005317 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005318 int depth = 0;
5319
5320 SKIP(6);
5321 SKIP_BLANKS;
5322 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005323 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005324 } else {
5325 NEXT;
5326 }
5327 if (xmlParserDebugEntities) {
5328 if ((ctxt->input != NULL) && (ctxt->input->filename))
5329 xmlGenericError(xmlGenericErrorContext,
5330 "%s(%d): ", ctxt->input->filename,
5331 ctxt->input->line);
5332 xmlGenericError(xmlGenericErrorContext,
5333 "Entering IGNORE Conditional Section\n");
5334 }
5335
5336 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005337 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005338 * But disable SAX event generating DTD building in the meantime
5339 */
5340 state = ctxt->disableSAX;
5341 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005342 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005343 ctxt->instate = XML_PARSER_IGNORE;
5344
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005345 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005346 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5347 depth++;
5348 SKIP(3);
5349 continue;
5350 }
5351 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5352 if (--depth >= 0) SKIP(3);
5353 continue;
5354 }
5355 NEXT;
5356 continue;
5357 }
5358
5359 ctxt->disableSAX = state;
5360 ctxt->instate = instate;
5361
5362 if (xmlParserDebugEntities) {
5363 if ((ctxt->input != NULL) && (ctxt->input->filename))
5364 xmlGenericError(xmlGenericErrorContext,
5365 "%s(%d): ", ctxt->input->filename,
5366 ctxt->input->line);
5367 xmlGenericError(xmlGenericErrorContext,
5368 "Leaving IGNORE Conditional Section\n");
5369 }
5370
5371 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005372 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
5374
5375 if (RAW == 0)
5376 SHRINK;
5377
5378 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005379 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 } else {
5381 SKIP(3);
5382 }
5383}
5384
5385/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005386 * xmlParseMarkupDecl:
5387 * @ctxt: an XML parser context
5388 *
5389 * parse Markup declarations
5390 *
5391 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5392 * NotationDecl | PI | Comment
5393 *
5394 * [ VC: Proper Declaration/PE Nesting ]
5395 * Parameter-entity replacement text must be properly nested with
5396 * markup declarations. That is to say, if either the first character
5397 * or the last character of a markup declaration (markupdecl above) is
5398 * contained in the replacement text for a parameter-entity reference,
5399 * both must be contained in the same replacement text.
5400 *
5401 * [ WFC: PEs in Internal Subset ]
5402 * In the internal DTD subset, parameter-entity references can occur
5403 * only where markup declarations can occur, not within markup declarations.
5404 * (This does not apply to references that occur in external parameter
5405 * entities or to the external subset.)
5406 */
5407void
5408xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5409 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005410 if (CUR == '<') {
5411 if (NXT(1) == '!') {
5412 switch (NXT(2)) {
5413 case 'E':
5414 if (NXT(3) == 'L')
5415 xmlParseElementDecl(ctxt);
5416 else if (NXT(3) == 'N')
5417 xmlParseEntityDecl(ctxt);
5418 break;
5419 case 'A':
5420 xmlParseAttributeListDecl(ctxt);
5421 break;
5422 case 'N':
5423 xmlParseNotationDecl(ctxt);
5424 break;
5425 case '-':
5426 xmlParseComment(ctxt);
5427 break;
5428 default:
5429 /* there is an error but it will be detected later */
5430 break;
5431 }
5432 } else if (NXT(1) == '?') {
5433 xmlParsePI(ctxt);
5434 }
5435 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005436 /*
5437 * This is only for internal subset. On external entities,
5438 * the replacement is done before parsing stage
5439 */
5440 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5441 xmlParsePEReference(ctxt);
5442
5443 /*
5444 * Conditional sections are allowed from entities included
5445 * by PE References in the internal subset.
5446 */
5447 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5448 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5449 xmlParseConditionalSections(ctxt);
5450 }
5451 }
5452
5453 ctxt->instate = XML_PARSER_DTD;
5454}
5455
5456/**
5457 * xmlParseTextDecl:
5458 * @ctxt: an XML parser context
5459 *
5460 * parse an XML declaration header for external entities
5461 *
5462 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5463 *
5464 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5465 */
5466
5467void
5468xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5469 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005470 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005471
5472 /*
5473 * We know that '<?xml' is here.
5474 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005475 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005476 SKIP(5);
5477 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005478 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005479 return;
5480 }
5481
William M. Brack76e95df2003-10-18 16:20:14 +00005482 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5484 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005485 }
5486 SKIP_BLANKS;
5487
5488 /*
5489 * We may have the VersionInfo here.
5490 */
5491 version = xmlParseVersionInfo(ctxt);
5492 if (version == NULL)
5493 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005494 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005495 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5497 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005498 }
5499 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005500 ctxt->input->version = version;
5501
5502 /*
5503 * We must have the encoding declaration
5504 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005505 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005506 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5507 /*
5508 * The XML REC instructs us to stop parsing right here
5509 */
5510 return;
5511 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005512 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5513 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5514 "Missing encoding in text declaration\n");
5515 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005516
5517 SKIP_BLANKS;
5518 if ((RAW == '?') && (NXT(1) == '>')) {
5519 SKIP(2);
5520 } else if (RAW == '>') {
5521 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005522 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005523 NEXT;
5524 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005525 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005526 MOVETO_ENDTAG(CUR_PTR);
5527 NEXT;
5528 }
5529}
5530
5531/**
Owen Taylor3473f882001-02-23 17:55:21 +00005532 * xmlParseExternalSubset:
5533 * @ctxt: an XML parser context
5534 * @ExternalID: the external identifier
5535 * @SystemID: the system identifier (or URL)
5536 *
5537 * parse Markup declarations from an external subset
5538 *
5539 * [30] extSubset ::= textDecl? extSubsetDecl
5540 *
5541 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5542 */
5543void
5544xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5545 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005546 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005548 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005549 xmlParseTextDecl(ctxt);
5550 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5551 /*
5552 * The XML REC instructs us to stop parsing right here
5553 */
5554 ctxt->instate = XML_PARSER_EOF;
5555 return;
5556 }
5557 }
5558 if (ctxt->myDoc == NULL) {
5559 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5560 }
5561 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5562 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5563
5564 ctxt->instate = XML_PARSER_DTD;
5565 ctxt->external = 1;
5566 while (((RAW == '<') && (NXT(1) == '?')) ||
5567 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005568 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005569 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005570 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005571
5572 GROW;
5573 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5574 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005575 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005576 NEXT;
5577 } else if (RAW == '%') {
5578 xmlParsePEReference(ctxt);
5579 } else
5580 xmlParseMarkupDecl(ctxt);
5581
5582 /*
5583 * Pop-up of finished entities.
5584 */
5585 while ((RAW == 0) && (ctxt->inputNr > 1))
5586 xmlPopInput(ctxt);
5587
Daniel Veillardfdc91562002-07-01 21:52:03 +00005588 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005589 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 break;
5591 }
5592 }
5593
5594 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005595 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005596 }
5597
5598}
5599
5600/**
5601 * xmlParseReference:
5602 * @ctxt: an XML parser context
5603 *
5604 * parse and handle entity references in content, depending on the SAX
5605 * interface, this may end-up in a call to character() if this is a
5606 * CharRef, a predefined entity, if there is no reference() callback.
5607 * or if the parser was asked to switch to that mode.
5608 *
5609 * [67] Reference ::= EntityRef | CharRef
5610 */
5611void
5612xmlParseReference(xmlParserCtxtPtr ctxt) {
5613 xmlEntityPtr ent;
5614 xmlChar *val;
5615 if (RAW != '&') return;
5616
5617 if (NXT(1) == '#') {
5618 int i = 0;
5619 xmlChar out[10];
5620 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005621 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005622
5623 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5624 /*
5625 * So we are using non-UTF-8 buffers
5626 * Check that the char fit on 8bits, if not
5627 * generate a CharRef.
5628 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005629 if (value <= 0xFF) {
5630 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005631 out[1] = 0;
5632 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5633 (!ctxt->disableSAX))
5634 ctxt->sax->characters(ctxt->userData, out, 1);
5635 } else {
5636 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005637 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005638 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005639 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005640 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5641 (!ctxt->disableSAX))
5642 ctxt->sax->reference(ctxt->userData, out);
5643 }
5644 } else {
5645 /*
5646 * Just encode the value in UTF-8
5647 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005648 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005649 out[i] = 0;
5650 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5651 (!ctxt->disableSAX))
5652 ctxt->sax->characters(ctxt->userData, out, i);
5653 }
5654 } else {
5655 ent = xmlParseEntityRef(ctxt);
5656 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005657 if (!ctxt->wellFormed)
5658 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005659 if ((ent->name != NULL) &&
5660 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5661 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005662 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005663
5664
5665 /*
5666 * The first reference to the entity trigger a parsing phase
5667 * where the ent->children is filled with the result from
5668 * the parsing.
5669 */
5670 if (ent->children == NULL) {
5671 xmlChar *value;
5672 value = ent->content;
5673
5674 /*
5675 * Check that this entity is well formed
5676 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005677 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005678 (value[1] == 0) && (value[0] == '<') &&
5679 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5680 /*
5681 * DONE: get definite answer on this !!!
5682 * Lots of entity decls are used to declare a single
5683 * char
5684 * <!ENTITY lt "<">
5685 * Which seems to be valid since
5686 * 2.4: The ampersand character (&) and the left angle
5687 * bracket (<) may appear in their literal form only
5688 * when used ... They are also legal within the literal
5689 * entity value of an internal entity declaration;i
5690 * see "4.3.2 Well-Formed Parsed Entities".
5691 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5692 * Looking at the OASIS test suite and James Clark
5693 * tests, this is broken. However the XML REC uses
5694 * it. Is the XML REC not well-formed ????
5695 * This is a hack to avoid this problem
5696 *
5697 * ANSWER: since lt gt amp .. are already defined,
5698 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005699 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005700 * is lousy but acceptable.
5701 */
5702 list = xmlNewDocText(ctxt->myDoc, value);
5703 if (list != NULL) {
5704 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5705 (ent->children == NULL)) {
5706 ent->children = list;
5707 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005708 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005709 list->parent = (xmlNodePtr) ent;
5710 } else {
5711 xmlFreeNodeList(list);
5712 }
5713 } else if (list != NULL) {
5714 xmlFreeNodeList(list);
5715 }
5716 } else {
5717 /*
5718 * 4.3.2: An internal general parsed entity is well-formed
5719 * if its replacement text matches the production labeled
5720 * content.
5721 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005722
5723 void *user_data;
5724 /*
5725 * This is a bit hackish but this seems the best
5726 * way to make sure both SAX and DOM entity support
5727 * behaves okay.
5728 */
5729 if (ctxt->userData == ctxt)
5730 user_data = NULL;
5731 else
5732 user_data = ctxt->userData;
5733
Owen Taylor3473f882001-02-23 17:55:21 +00005734 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5735 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005736 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5737 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 ctxt->depth--;
5739 } else if (ent->etype ==
5740 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5741 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005742 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005743 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005744 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 ctxt->depth--;
5746 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005747 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005748 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5749 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005752 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005753 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005754 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005755 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5756 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005757 (ent->children == NULL)) {
5758 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005759 if (ctxt->replaceEntities) {
5760 /*
5761 * Prune it directly in the generated document
5762 * except for single text nodes.
5763 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005764 if (((list->type == XML_TEXT_NODE) &&
5765 (list->next == NULL)) ||
5766 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005767 list->parent = (xmlNodePtr) ent;
5768 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005769 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005770 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005771 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005772 while (list != NULL) {
5773 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005774 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005775 if (list->next == NULL)
5776 ent->last = list;
5777 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005778 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005779 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005780#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005781 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5782 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005783#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005784 }
5785 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005786 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005787 while (list != NULL) {
5788 list->parent = (xmlNodePtr) ent;
5789 if (list->next == NULL)
5790 ent->last = list;
5791 list = list->next;
5792 }
Owen Taylor3473f882001-02-23 17:55:21 +00005793 }
5794 } else {
5795 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005796 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005797 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005798 } else if ((ret != XML_ERR_OK) &&
5799 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005800 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 } else if (list != NULL) {
5802 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005803 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005804 }
5805 }
5806 }
5807 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5808 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5809 /*
5810 * Create a node.
5811 */
5812 ctxt->sax->reference(ctxt->userData, ent->name);
5813 return;
5814 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005815 /*
5816 * There is a problem on the handling of _private for entities
5817 * (bug 155816): Should we copy the content of the field from
5818 * the entity (possibly overwriting some value set by the user
5819 * when a copy is created), should we leave it alone, or should
5820 * we try to take care of different situations? The problem
5821 * is exacerbated by the usage of this field by the xmlReader.
5822 * To fix this bug, we look at _private on the created node
5823 * and, if it's NULL, we copy in whatever was in the entity.
5824 * If it's not NULL we leave it alone. This is somewhat of a
5825 * hack - maybe we should have further tests to determine
5826 * what to do.
5827 */
Owen Taylor3473f882001-02-23 17:55:21 +00005828 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5829 /*
5830 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005831 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005832 * In the first occurrence list contains the replacement.
5833 * progressive == 2 means we are operating on the Reader
5834 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005835 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005836 if (((list == NULL) && (ent->owner == 0)) ||
5837 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005838 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005839
5840 /*
5841 * when operating on a reader, the entities definitions
5842 * are always owning the entities subtree.
5843 if (ctxt->parseMode == XML_PARSE_READER)
5844 ent->owner = 1;
5845 */
5846
Daniel Veillard62f313b2001-07-04 19:49:14 +00005847 cur = ent->children;
5848 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005849 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005850 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005851 if (nw->_private == NULL)
5852 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005853 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005854 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005855 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005856 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005857 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005858 if (cur == ent->last) {
5859 /*
5860 * needed to detect some strange empty
5861 * node cases in the reader tests
5862 */
5863 if ((ctxt->parseMode == XML_PARSE_READER) &&
5864 (nw->type == XML_ELEMENT_NODE) &&
5865 (nw->children == NULL))
5866 nw->extra = 1;
5867
Daniel Veillard62f313b2001-07-04 19:49:14 +00005868 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005869 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005870 cur = cur->next;
5871 }
Daniel Veillard81273902003-09-30 00:43:48 +00005872#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005873 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005874 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005875#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005876 } else if (list == NULL) {
5877 xmlNodePtr nw = NULL, cur, next, last,
5878 firstChild = NULL;
5879 /*
5880 * Copy the entity child list and make it the new
5881 * entity child list. The goal is to make sure any
5882 * ID or REF referenced will be the one from the
5883 * document content and not the entity copy.
5884 */
5885 cur = ent->children;
5886 ent->children = NULL;
5887 last = ent->last;
5888 ent->last = NULL;
5889 while (cur != NULL) {
5890 next = cur->next;
5891 cur->next = NULL;
5892 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005893 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005894 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005895 if (nw->_private == NULL)
5896 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005897 if (firstChild == NULL){
5898 firstChild = cur;
5899 }
5900 xmlAddChild((xmlNodePtr) ent, nw);
5901 xmlAddChild(ctxt->node, cur);
5902 }
5903 if (cur == last)
5904 break;
5905 cur = next;
5906 }
5907 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005908#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005909 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5910 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005911#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005912 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005913 const xmlChar *nbktext;
5914
Daniel Veillard62f313b2001-07-04 19:49:14 +00005915 /*
5916 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005917 * node with a possible previous text one which
5918 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005919 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005920 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5921 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005922 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005923 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005924 if ((ent->last != ent->children) &&
5925 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005926 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005927 xmlAddChildList(ctxt->node, ent->children);
5928 }
5929
Owen Taylor3473f882001-02-23 17:55:21 +00005930 /*
5931 * This is to avoid a nasty side effect, see
5932 * characters() in SAX.c
5933 */
5934 ctxt->nodemem = 0;
5935 ctxt->nodelen = 0;
5936 return;
5937 } else {
5938 /*
5939 * Probably running in SAX mode
5940 */
5941 xmlParserInputPtr input;
5942
5943 input = xmlNewEntityInputStream(ctxt, ent);
5944 xmlPushInput(ctxt, input);
5945 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005946 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5947 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005948 xmlParseTextDecl(ctxt);
5949 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5950 /*
5951 * The XML REC instructs us to stop parsing right here
5952 */
5953 ctxt->instate = XML_PARSER_EOF;
5954 return;
5955 }
5956 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005957 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5958 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005959 }
5960 }
5961 return;
5962 }
5963 }
5964 } else {
5965 val = ent->content;
5966 if (val == NULL) return;
5967 /*
5968 * inline the entity.
5969 */
5970 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5971 (!ctxt->disableSAX))
5972 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5973 }
5974 }
5975}
5976
5977/**
5978 * xmlParseEntityRef:
5979 * @ctxt: an XML parser context
5980 *
5981 * parse ENTITY references declarations
5982 *
5983 * [68] EntityRef ::= '&' Name ';'
5984 *
5985 * [ WFC: Entity Declared ]
5986 * In a document without any DTD, a document with only an internal DTD
5987 * subset which contains no parameter entity references, or a document
5988 * with "standalone='yes'", the Name given in the entity reference
5989 * must match that in an entity declaration, except that well-formed
5990 * documents need not declare any of the following entities: amp, lt,
5991 * gt, apos, quot. The declaration of a parameter entity must precede
5992 * any reference to it. Similarly, the declaration of a general entity
5993 * must precede any reference to it which appears in a default value in an
5994 * attribute-list declaration. Note that if entities are declared in the
5995 * external subset or in external parameter entities, a non-validating
5996 * processor is not obligated to read and process their declarations;
5997 * for such documents, the rule that an entity must be declared is a
5998 * well-formedness constraint only if standalone='yes'.
5999 *
6000 * [ WFC: Parsed Entity ]
6001 * An entity reference must not contain the name of an unparsed entity
6002 *
6003 * Returns the xmlEntityPtr if found, or NULL otherwise.
6004 */
6005xmlEntityPtr
6006xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006007 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006008 xmlEntityPtr ent = NULL;
6009
6010 GROW;
6011
6012 if (RAW == '&') {
6013 NEXT;
6014 name = xmlParseName(ctxt);
6015 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6017 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006018 } else {
6019 if (RAW == ';') {
6020 NEXT;
6021 /*
6022 * Ask first SAX for entity resolution, otherwise try the
6023 * predefined set.
6024 */
6025 if (ctxt->sax != NULL) {
6026 if (ctxt->sax->getEntity != NULL)
6027 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006028 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006029 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006030 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6031 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006032 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006033 }
Owen Taylor3473f882001-02-23 17:55:21 +00006034 }
6035 /*
6036 * [ WFC: Entity Declared ]
6037 * In a document without any DTD, a document with only an
6038 * internal DTD subset which contains no parameter entity
6039 * references, or a document with "standalone='yes'", the
6040 * Name given in the entity reference must match that in an
6041 * entity declaration, except that well-formed documents
6042 * need not declare any of the following entities: amp, lt,
6043 * gt, apos, quot.
6044 * The declaration of a parameter entity must precede any
6045 * reference to it.
6046 * Similarly, the declaration of a general entity must
6047 * precede any reference to it which appears in a default
6048 * value in an attribute-list declaration. Note that if
6049 * entities are declared in the external subset or in
6050 * external parameter entities, a non-validating processor
6051 * is not obligated to read and process their declarations;
6052 * for such documents, the rule that an entity must be
6053 * declared is a well-formedness constraint only if
6054 * standalone='yes'.
6055 */
6056 if (ent == NULL) {
6057 if ((ctxt->standalone == 1) ||
6058 ((ctxt->hasExternalSubset == 0) &&
6059 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006060 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006061 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006062 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006063 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006064 "Entity '%s' not defined\n", name);
6065 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006066 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006067 }
6068
6069 /*
6070 * [ WFC: Parsed Entity ]
6071 * An entity reference must not contain the name of an
6072 * unparsed entity
6073 */
6074 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006075 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006076 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006077 }
6078
6079 /*
6080 * [ WFC: No External Entity References ]
6081 * Attribute values cannot contain direct or indirect
6082 * entity references to external entities.
6083 */
6084 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6085 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006086 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6087 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006088 }
6089 /*
6090 * [ WFC: No < in Attribute Values ]
6091 * The replacement text of any entity referred to directly or
6092 * indirectly in an attribute value (other than "&lt;") must
6093 * not contain a <.
6094 */
6095 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6096 (ent != NULL) &&
6097 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6098 (ent->content != NULL) &&
6099 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006100 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006101 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006102 }
6103
6104 /*
6105 * Internal check, no parameter entities here ...
6106 */
6107 else {
6108 switch (ent->etype) {
6109 case XML_INTERNAL_PARAMETER_ENTITY:
6110 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006111 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6112 "Attempt to reference the parameter entity '%s'\n",
6113 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006114 break;
6115 default:
6116 break;
6117 }
6118 }
6119
6120 /*
6121 * [ WFC: No Recursion ]
6122 * A parsed entity must not contain a recursive reference
6123 * to itself, either directly or indirectly.
6124 * Done somewhere else
6125 */
6126
6127 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006128 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006129 }
Owen Taylor3473f882001-02-23 17:55:21 +00006130 }
6131 }
6132 return(ent);
6133}
6134
6135/**
6136 * xmlParseStringEntityRef:
6137 * @ctxt: an XML parser context
6138 * @str: a pointer to an index in the string
6139 *
6140 * parse ENTITY references declarations, but this version parses it from
6141 * a string value.
6142 *
6143 * [68] EntityRef ::= '&' Name ';'
6144 *
6145 * [ WFC: Entity Declared ]
6146 * In a document without any DTD, a document with only an internal DTD
6147 * subset which contains no parameter entity references, or a document
6148 * with "standalone='yes'", the Name given in the entity reference
6149 * must match that in an entity declaration, except that well-formed
6150 * documents need not declare any of the following entities: amp, lt,
6151 * gt, apos, quot. The declaration of a parameter entity must precede
6152 * any reference to it. Similarly, the declaration of a general entity
6153 * must precede any reference to it which appears in a default value in an
6154 * attribute-list declaration. Note that if entities are declared in the
6155 * external subset or in external parameter entities, a non-validating
6156 * processor is not obligated to read and process their declarations;
6157 * for such documents, the rule that an entity must be declared is a
6158 * well-formedness constraint only if standalone='yes'.
6159 *
6160 * [ WFC: Parsed Entity ]
6161 * An entity reference must not contain the name of an unparsed entity
6162 *
6163 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6164 * is updated to the current location in the string.
6165 */
6166xmlEntityPtr
6167xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6168 xmlChar *name;
6169 const xmlChar *ptr;
6170 xmlChar cur;
6171 xmlEntityPtr ent = NULL;
6172
6173 if ((str == NULL) || (*str == NULL))
6174 return(NULL);
6175 ptr = *str;
6176 cur = *ptr;
6177 if (cur == '&') {
6178 ptr++;
6179 cur = *ptr;
6180 name = xmlParseStringName(ctxt, &ptr);
6181 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006182 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6183 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006184 } else {
6185 if (*ptr == ';') {
6186 ptr++;
6187 /*
6188 * Ask first SAX for entity resolution, otherwise try the
6189 * predefined set.
6190 */
6191 if (ctxt->sax != NULL) {
6192 if (ctxt->sax->getEntity != NULL)
6193 ent = ctxt->sax->getEntity(ctxt->userData, name);
6194 if (ent == NULL)
6195 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006196 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006197 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006198 }
Owen Taylor3473f882001-02-23 17:55:21 +00006199 }
6200 /*
6201 * [ WFC: Entity Declared ]
6202 * In a document without any DTD, a document with only an
6203 * internal DTD subset which contains no parameter entity
6204 * references, or a document with "standalone='yes'", the
6205 * Name given in the entity reference must match that in an
6206 * entity declaration, except that well-formed documents
6207 * need not declare any of the following entities: amp, lt,
6208 * gt, apos, quot.
6209 * The declaration of a parameter entity must precede any
6210 * reference to it.
6211 * Similarly, the declaration of a general entity must
6212 * precede any reference to it which appears in a default
6213 * value in an attribute-list declaration. Note that if
6214 * entities are declared in the external subset or in
6215 * external parameter entities, a non-validating processor
6216 * is not obligated to read and process their declarations;
6217 * for such documents, the rule that an entity must be
6218 * declared is a well-formedness constraint only if
6219 * standalone='yes'.
6220 */
6221 if (ent == NULL) {
6222 if ((ctxt->standalone == 1) ||
6223 ((ctxt->hasExternalSubset == 0) &&
6224 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006225 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006226 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006228 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006229 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006230 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006231 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006232 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006233 }
6234
6235 /*
6236 * [ WFC: Parsed Entity ]
6237 * An entity reference must not contain the name of an
6238 * unparsed entity
6239 */
6240 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006241 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006242 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006243 }
6244
6245 /*
6246 * [ WFC: No External Entity References ]
6247 * Attribute values cannot contain direct or indirect
6248 * entity references to external entities.
6249 */
6250 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6251 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006252 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006253 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006254 }
6255 /*
6256 * [ WFC: No < in Attribute Values ]
6257 * The replacement text of any entity referred to directly or
6258 * indirectly in an attribute value (other than "&lt;") must
6259 * not contain a <.
6260 */
6261 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6262 (ent != NULL) &&
6263 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6264 (ent->content != NULL) &&
6265 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006266 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6267 "'<' in entity '%s' is not allowed in attributes values\n",
6268 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270
6271 /*
6272 * Internal check, no parameter entities here ...
6273 */
6274 else {
6275 switch (ent->etype) {
6276 case XML_INTERNAL_PARAMETER_ENTITY:
6277 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006278 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6279 "Attempt to reference the parameter entity '%s'\n",
6280 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006281 break;
6282 default:
6283 break;
6284 }
6285 }
6286
6287 /*
6288 * [ WFC: No Recursion ]
6289 * A parsed entity must not contain a recursive reference
6290 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006291 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006292 */
6293
6294 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006295 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006296 }
6297 xmlFree(name);
6298 }
6299 }
6300 *str = ptr;
6301 return(ent);
6302}
6303
6304/**
6305 * xmlParsePEReference:
6306 * @ctxt: an XML parser context
6307 *
6308 * parse PEReference declarations
6309 * The entity content is handled directly by pushing it's content as
6310 * a new input stream.
6311 *
6312 * [69] PEReference ::= '%' Name ';'
6313 *
6314 * [ WFC: No Recursion ]
6315 * A parsed entity must not contain a recursive
6316 * reference to itself, either directly or indirectly.
6317 *
6318 * [ WFC: Entity Declared ]
6319 * In a document without any DTD, a document with only an internal DTD
6320 * subset which contains no parameter entity references, or a document
6321 * with "standalone='yes'", ... ... The declaration of a parameter
6322 * entity must precede any reference to it...
6323 *
6324 * [ VC: Entity Declared ]
6325 * In a document with an external subset or external parameter entities
6326 * with "standalone='no'", ... ... The declaration of a parameter entity
6327 * must precede any reference to it...
6328 *
6329 * [ WFC: In DTD ]
6330 * Parameter-entity references may only appear in the DTD.
6331 * NOTE: misleading but this is handled.
6332 */
6333void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006334xmlParsePEReference(xmlParserCtxtPtr ctxt)
6335{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006336 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 xmlEntityPtr entity = NULL;
6338 xmlParserInputPtr input;
6339
6340 if (RAW == '%') {
6341 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006342 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006343 if (name == NULL) {
6344 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6345 "xmlParsePEReference: no name\n");
6346 } else {
6347 if (RAW == ';') {
6348 NEXT;
6349 if ((ctxt->sax != NULL) &&
6350 (ctxt->sax->getParameterEntity != NULL))
6351 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6352 name);
6353 if (entity == NULL) {
6354 /*
6355 * [ WFC: Entity Declared ]
6356 * In a document without any DTD, a document with only an
6357 * internal DTD subset which contains no parameter entity
6358 * references, or a document with "standalone='yes'", ...
6359 * ... The declaration of a parameter entity must precede
6360 * any reference to it...
6361 */
6362 if ((ctxt->standalone == 1) ||
6363 ((ctxt->hasExternalSubset == 0) &&
6364 (ctxt->hasPErefs == 0))) {
6365 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6366 "PEReference: %%%s; not found\n",
6367 name);
6368 } else {
6369 /*
6370 * [ VC: Entity Declared ]
6371 * In a document with an external subset or external
6372 * parameter entities with "standalone='no'", ...
6373 * ... The declaration of a parameter entity must
6374 * precede any reference to it...
6375 */
6376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6377 "PEReference: %%%s; not found\n",
6378 name, NULL);
6379 ctxt->valid = 0;
6380 }
6381 } else {
6382 /*
6383 * Internal checking in case the entity quest barfed
6384 */
6385 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6386 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6387 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6388 "Internal: %%%s; is not a parameter entity\n",
6389 name, NULL);
6390 } else if (ctxt->input->free != deallocblankswrapper) {
6391 input =
6392 xmlNewBlanksWrapperInputStream(ctxt, entity);
6393 xmlPushInput(ctxt, input);
6394 } else {
6395 /*
6396 * TODO !!!
6397 * handle the extra spaces added before and after
6398 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6399 */
6400 input = xmlNewEntityInputStream(ctxt, entity);
6401 xmlPushInput(ctxt, input);
6402 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006403 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006404 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006405 xmlParseTextDecl(ctxt);
6406 if (ctxt->errNo ==
6407 XML_ERR_UNSUPPORTED_ENCODING) {
6408 /*
6409 * The XML REC instructs us to stop parsing
6410 * right here
6411 */
6412 ctxt->instate = XML_PARSER_EOF;
6413 return;
6414 }
6415 }
6416 }
6417 }
6418 ctxt->hasPErefs = 1;
6419 } else {
6420 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6421 }
6422 }
Owen Taylor3473f882001-02-23 17:55:21 +00006423 }
6424}
6425
6426/**
6427 * xmlParseStringPEReference:
6428 * @ctxt: an XML parser context
6429 * @str: a pointer to an index in the string
6430 *
6431 * parse PEReference declarations
6432 *
6433 * [69] PEReference ::= '%' Name ';'
6434 *
6435 * [ WFC: No Recursion ]
6436 * A parsed entity must not contain a recursive
6437 * reference to itself, either directly or indirectly.
6438 *
6439 * [ WFC: Entity Declared ]
6440 * In a document without any DTD, a document with only an internal DTD
6441 * subset which contains no parameter entity references, or a document
6442 * with "standalone='yes'", ... ... The declaration of a parameter
6443 * entity must precede any reference to it...
6444 *
6445 * [ VC: Entity Declared ]
6446 * In a document with an external subset or external parameter entities
6447 * with "standalone='no'", ... ... The declaration of a parameter entity
6448 * must precede any reference to it...
6449 *
6450 * [ WFC: In DTD ]
6451 * Parameter-entity references may only appear in the DTD.
6452 * NOTE: misleading but this is handled.
6453 *
6454 * Returns the string of the entity content.
6455 * str is updated to the current value of the index
6456 */
6457xmlEntityPtr
6458xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6459 const xmlChar *ptr;
6460 xmlChar cur;
6461 xmlChar *name;
6462 xmlEntityPtr entity = NULL;
6463
6464 if ((str == NULL) || (*str == NULL)) return(NULL);
6465 ptr = *str;
6466 cur = *ptr;
6467 if (cur == '%') {
6468 ptr++;
6469 cur = *ptr;
6470 name = xmlParseStringName(ctxt, &ptr);
6471 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6473 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006474 } else {
6475 cur = *ptr;
6476 if (cur == ';') {
6477 ptr++;
6478 cur = *ptr;
6479 if ((ctxt->sax != NULL) &&
6480 (ctxt->sax->getParameterEntity != NULL))
6481 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6482 name);
6483 if (entity == NULL) {
6484 /*
6485 * [ WFC: Entity Declared ]
6486 * In a document without any DTD, a document with only an
6487 * internal DTD subset which contains no parameter entity
6488 * references, or a document with "standalone='yes'", ...
6489 * ... The declaration of a parameter entity must precede
6490 * any reference to it...
6491 */
6492 if ((ctxt->standalone == 1) ||
6493 ((ctxt->hasExternalSubset == 0) &&
6494 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006495 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006496 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006497 } else {
6498 /*
6499 * [ VC: Entity Declared ]
6500 * In a document with an external subset or external
6501 * parameter entities with "standalone='no'", ...
6502 * ... The declaration of a parameter entity must
6503 * precede any reference to it...
6504 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006505 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6506 "PEReference: %%%s; not found\n",
6507 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006508 ctxt->valid = 0;
6509 }
6510 } else {
6511 /*
6512 * Internal checking in case the entity quest barfed
6513 */
6514 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6515 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006516 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6517 "%%%s; is not a parameter entity\n",
6518 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006519 }
6520 }
6521 ctxt->hasPErefs = 1;
6522 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006523 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006524 }
6525 xmlFree(name);
6526 }
6527 }
6528 *str = ptr;
6529 return(entity);
6530}
6531
6532/**
6533 * xmlParseDocTypeDecl:
6534 * @ctxt: an XML parser context
6535 *
6536 * parse a DOCTYPE declaration
6537 *
6538 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6539 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6540 *
6541 * [ VC: Root Element Type ]
6542 * The Name in the document type declaration must match the element
6543 * type of the root element.
6544 */
6545
6546void
6547xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006548 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006549 xmlChar *ExternalID = NULL;
6550 xmlChar *URI = NULL;
6551
6552 /*
6553 * We know that '<!DOCTYPE' has been detected.
6554 */
6555 SKIP(9);
6556
6557 SKIP_BLANKS;
6558
6559 /*
6560 * Parse the DOCTYPE name.
6561 */
6562 name = xmlParseName(ctxt);
6563 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006564 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6565 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006566 }
6567 ctxt->intSubName = name;
6568
6569 SKIP_BLANKS;
6570
6571 /*
6572 * Check for SystemID and ExternalID
6573 */
6574 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6575
6576 if ((URI != NULL) || (ExternalID != NULL)) {
6577 ctxt->hasExternalSubset = 1;
6578 }
6579 ctxt->extSubURI = URI;
6580 ctxt->extSubSystem = ExternalID;
6581
6582 SKIP_BLANKS;
6583
6584 /*
6585 * Create and update the internal subset.
6586 */
6587 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6588 (!ctxt->disableSAX))
6589 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6590
6591 /*
6592 * Is there any internal subset declarations ?
6593 * they are handled separately in xmlParseInternalSubset()
6594 */
6595 if (RAW == '[')
6596 return;
6597
6598 /*
6599 * We should be at the end of the DOCTYPE declaration.
6600 */
6601 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006602 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006603 }
6604 NEXT;
6605}
6606
6607/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006608 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006609 * @ctxt: an XML parser context
6610 *
6611 * parse the internal subset declaration
6612 *
6613 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6614 */
6615
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006616static void
Owen Taylor3473f882001-02-23 17:55:21 +00006617xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6618 /*
6619 * Is there any DTD definition ?
6620 */
6621 if (RAW == '[') {
6622 ctxt->instate = XML_PARSER_DTD;
6623 NEXT;
6624 /*
6625 * Parse the succession of Markup declarations and
6626 * PEReferences.
6627 * Subsequence (markupdecl | PEReference | S)*
6628 */
6629 while (RAW != ']') {
6630 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006631 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006632
6633 SKIP_BLANKS;
6634 xmlParseMarkupDecl(ctxt);
6635 xmlParsePEReference(ctxt);
6636
6637 /*
6638 * Pop-up of finished entities.
6639 */
6640 while ((RAW == 0) && (ctxt->inputNr > 1))
6641 xmlPopInput(ctxt);
6642
6643 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006645 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006646 break;
6647 }
6648 }
6649 if (RAW == ']') {
6650 NEXT;
6651 SKIP_BLANKS;
6652 }
6653 }
6654
6655 /*
6656 * We should be at the end of the DOCTYPE declaration.
6657 */
6658 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006659 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 }
6661 NEXT;
6662}
6663
Daniel Veillard81273902003-09-30 00:43:48 +00006664#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006665/**
6666 * xmlParseAttribute:
6667 * @ctxt: an XML parser context
6668 * @value: a xmlChar ** used to store the value of the attribute
6669 *
6670 * parse an attribute
6671 *
6672 * [41] Attribute ::= Name Eq AttValue
6673 *
6674 * [ WFC: No External Entity References ]
6675 * Attribute values cannot contain direct or indirect entity references
6676 * to external entities.
6677 *
6678 * [ WFC: No < in Attribute Values ]
6679 * The replacement text of any entity referred to directly or indirectly in
6680 * an attribute value (other than "&lt;") must not contain a <.
6681 *
6682 * [ VC: Attribute Value Type ]
6683 * The attribute must have been declared; the value must be of the type
6684 * declared for it.
6685 *
6686 * [25] Eq ::= S? '=' S?
6687 *
6688 * With namespace:
6689 *
6690 * [NS 11] Attribute ::= QName Eq AttValue
6691 *
6692 * Also the case QName == xmlns:??? is handled independently as a namespace
6693 * definition.
6694 *
6695 * Returns the attribute name, and the value in *value.
6696 */
6697
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006698const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006699xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006700 const xmlChar *name;
6701 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006702
6703 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006704 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006705 name = xmlParseName(ctxt);
6706 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006707 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006708 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006709 return(NULL);
6710 }
6711
6712 /*
6713 * read the value
6714 */
6715 SKIP_BLANKS;
6716 if (RAW == '=') {
6717 NEXT;
6718 SKIP_BLANKS;
6719 val = xmlParseAttValue(ctxt);
6720 ctxt->instate = XML_PARSER_CONTENT;
6721 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006722 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006723 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006724 return(NULL);
6725 }
6726
6727 /*
6728 * Check that xml:lang conforms to the specification
6729 * No more registered as an error, just generate a warning now
6730 * since this was deprecated in XML second edition
6731 */
6732 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6733 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006734 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6735 "Malformed value for xml:lang : %s\n",
6736 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006737 }
6738 }
6739
6740 /*
6741 * Check that xml:space conforms to the specification
6742 */
6743 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6744 if (xmlStrEqual(val, BAD_CAST "default"))
6745 *(ctxt->space) = 0;
6746 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6747 *(ctxt->space) = 1;
6748 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006749 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006750"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006751 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006752 }
6753 }
6754
6755 *value = val;
6756 return(name);
6757}
6758
6759/**
6760 * xmlParseStartTag:
6761 * @ctxt: an XML parser context
6762 *
6763 * parse a start of tag either for rule element or
6764 * EmptyElement. In both case we don't parse the tag closing chars.
6765 *
6766 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6767 *
6768 * [ WFC: Unique Att Spec ]
6769 * No attribute name may appear more than once in the same start-tag or
6770 * empty-element tag.
6771 *
6772 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6773 *
6774 * [ WFC: Unique Att Spec ]
6775 * No attribute name may appear more than once in the same start-tag or
6776 * empty-element tag.
6777 *
6778 * With namespace:
6779 *
6780 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6781 *
6782 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6783 *
6784 * Returns the element name parsed
6785 */
6786
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006787const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006788xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006789 const xmlChar *name;
6790 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006791 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006792 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006793 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006794 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006795 int i;
6796
6797 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006798 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006799
6800 name = xmlParseName(ctxt);
6801 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006802 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006803 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006804 return(NULL);
6805 }
6806
6807 /*
6808 * Now parse the attributes, it ends up with the ending
6809 *
6810 * (S Attribute)* S?
6811 */
6812 SKIP_BLANKS;
6813 GROW;
6814
Daniel Veillard21a0f912001-02-25 19:54:14 +00006815 while ((RAW != '>') &&
6816 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006817 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006818 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006819 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006820
6821 attname = xmlParseAttribute(ctxt, &attvalue);
6822 if ((attname != NULL) && (attvalue != NULL)) {
6823 /*
6824 * [ WFC: Unique Att Spec ]
6825 * No attribute name may appear more than once in the same
6826 * start-tag or empty-element tag.
6827 */
6828 for (i = 0; i < nbatts;i += 2) {
6829 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006830 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 xmlFree(attvalue);
6832 goto failed;
6833 }
6834 }
Owen Taylor3473f882001-02-23 17:55:21 +00006835 /*
6836 * Add the pair to atts
6837 */
6838 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006839 maxatts = 22; /* allow for 10 attrs by default */
6840 atts = (const xmlChar **)
6841 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006842 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006843 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006844 if (attvalue != NULL)
6845 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006846 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006848 ctxt->atts = atts;
6849 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006850 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006851 const xmlChar **n;
6852
Owen Taylor3473f882001-02-23 17:55:21 +00006853 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006854 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006855 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006856 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006857 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006858 if (attvalue != NULL)
6859 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006860 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006861 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006862 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006863 ctxt->atts = atts;
6864 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006865 }
6866 atts[nbatts++] = attname;
6867 atts[nbatts++] = attvalue;
6868 atts[nbatts] = NULL;
6869 atts[nbatts + 1] = NULL;
6870 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006871 if (attvalue != NULL)
6872 xmlFree(attvalue);
6873 }
6874
6875failed:
6876
Daniel Veillard3772de32002-12-17 10:31:45 +00006877 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006878 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6879 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006880 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006881 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6882 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006883 }
6884 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006885 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6886 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006887 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6888 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006889 break;
6890 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006891 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006892 GROW;
6893 }
6894
6895 /*
6896 * SAX: Start of Element !
6897 */
6898 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006899 (!ctxt->disableSAX)) {
6900 if (nbatts > 0)
6901 ctxt->sax->startElement(ctxt->userData, name, atts);
6902 else
6903 ctxt->sax->startElement(ctxt->userData, name, NULL);
6904 }
Owen Taylor3473f882001-02-23 17:55:21 +00006905
6906 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006907 /* Free only the content strings */
6908 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006909 if (atts[i] != NULL)
6910 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006911 }
6912 return(name);
6913}
6914
6915/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006916 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006917 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006918 * @line: line of the start tag
6919 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006920 *
6921 * parse an end of tag
6922 *
6923 * [42] ETag ::= '</' Name S? '>'
6924 *
6925 * With namespace
6926 *
6927 * [NS 9] ETag ::= '</' QName S? '>'
6928 */
6929
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006930static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006931xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006932 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006933
6934 GROW;
6935 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006936 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006937 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006938 return;
6939 }
6940 SKIP(2);
6941
Daniel Veillard46de64e2002-05-29 08:21:33 +00006942 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006943
6944 /*
6945 * We should definitely be at the ending "S? '>'" part
6946 */
6947 GROW;
6948 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006949 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006950 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006951 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006952 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006953
6954 /*
6955 * [ WFC: Element Type Match ]
6956 * The Name in an element's end-tag must match the element type in the
6957 * start-tag.
6958 *
6959 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006960 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006961 if (name == NULL) name = BAD_CAST "unparseable";
6962 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006963 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006964 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006965 }
6966
6967 /*
6968 * SAX: End of Tag
6969 */
6970 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6971 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006972 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006973
Daniel Veillarde57ec792003-09-10 10:50:59 +00006974 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006975 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006976 return;
6977}
6978
6979/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006980 * xmlParseEndTag:
6981 * @ctxt: an XML parser context
6982 *
6983 * parse an end of tag
6984 *
6985 * [42] ETag ::= '</' Name S? '>'
6986 *
6987 * With namespace
6988 *
6989 * [NS 9] ETag ::= '</' QName S? '>'
6990 */
6991
6992void
6993xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006994 xmlParseEndTag1(ctxt, 0);
6995}
Daniel Veillard81273902003-09-30 00:43:48 +00006996#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006997
6998/************************************************************************
6999 * *
7000 * SAX 2 specific operations *
7001 * *
7002 ************************************************************************/
7003
7004static const xmlChar *
7005xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7006 int len = 0, l;
7007 int c;
7008 int count = 0;
7009
7010 /*
7011 * Handler for more complex cases
7012 */
7013 GROW;
7014 c = CUR_CHAR(l);
7015 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007016 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007017 return(NULL);
7018 }
7019
7020 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007021 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007022 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007023 (IS_COMBINING(c)) ||
7024 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007025 if (count++ > 100) {
7026 count = 0;
7027 GROW;
7028 }
7029 len += l;
7030 NEXTL(l);
7031 c = CUR_CHAR(l);
7032 }
7033 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7034}
7035
7036/*
7037 * xmlGetNamespace:
7038 * @ctxt: an XML parser context
7039 * @prefix: the prefix to lookup
7040 *
7041 * Lookup the namespace name for the @prefix (which ca be NULL)
7042 * The prefix must come from the @ctxt->dict dictionnary
7043 *
7044 * Returns the namespace name or NULL if not bound
7045 */
7046static const xmlChar *
7047xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7048 int i;
7049
Daniel Veillarde57ec792003-09-10 10:50:59 +00007050 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007051 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007052 if (ctxt->nsTab[i] == prefix) {
7053 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7054 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007055 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007056 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007057 return(NULL);
7058}
7059
7060/**
7061 * xmlParseNCName:
7062 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007063 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007064 *
7065 * parse an XML name.
7066 *
7067 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7068 * CombiningChar | Extender
7069 *
7070 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7071 *
7072 * Returns the Name parsed or NULL
7073 */
7074
7075static const xmlChar *
7076xmlParseNCName(xmlParserCtxtPtr ctxt) {
7077 const xmlChar *in;
7078 const xmlChar *ret;
7079 int count = 0;
7080
7081 /*
7082 * Accelerator for simple ASCII names
7083 */
7084 in = ctxt->input->cur;
7085 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7086 ((*in >= 0x41) && (*in <= 0x5A)) ||
7087 (*in == '_')) {
7088 in++;
7089 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7090 ((*in >= 0x41) && (*in <= 0x5A)) ||
7091 ((*in >= 0x30) && (*in <= 0x39)) ||
7092 (*in == '_') || (*in == '-') ||
7093 (*in == '.'))
7094 in++;
7095 if ((*in > 0) && (*in < 0x80)) {
7096 count = in - ctxt->input->cur;
7097 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7098 ctxt->input->cur = in;
7099 ctxt->nbChars += count;
7100 ctxt->input->col += count;
7101 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007102 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007103 }
7104 return(ret);
7105 }
7106 }
7107 return(xmlParseNCNameComplex(ctxt));
7108}
7109
7110/**
7111 * xmlParseQName:
7112 * @ctxt: an XML parser context
7113 * @prefix: pointer to store the prefix part
7114 *
7115 * parse an XML Namespace QName
7116 *
7117 * [6] QName ::= (Prefix ':')? LocalPart
7118 * [7] Prefix ::= NCName
7119 * [8] LocalPart ::= NCName
7120 *
7121 * Returns the Name parsed or NULL
7122 */
7123
7124static const xmlChar *
7125xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7126 const xmlChar *l, *p;
7127
7128 GROW;
7129
7130 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007131 if (l == NULL) {
7132 if (CUR == ':') {
7133 l = xmlParseName(ctxt);
7134 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007135 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7136 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007137 *prefix = NULL;
7138 return(l);
7139 }
7140 }
7141 return(NULL);
7142 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007143 if (CUR == ':') {
7144 NEXT;
7145 p = l;
7146 l = xmlParseNCName(ctxt);
7147 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007148 xmlChar *tmp;
7149
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007150 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7151 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007152 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7153 p = xmlDictLookup(ctxt->dict, tmp, -1);
7154 if (tmp != NULL) xmlFree(tmp);
7155 *prefix = NULL;
7156 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007157 }
7158 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007159 xmlChar *tmp;
7160
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007161 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7162 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007163 NEXT;
7164 tmp = (xmlChar *) xmlParseName(ctxt);
7165 if (tmp != NULL) {
7166 tmp = xmlBuildQName(tmp, l, NULL, 0);
7167 l = xmlDictLookup(ctxt->dict, tmp, -1);
7168 if (tmp != NULL) xmlFree(tmp);
7169 *prefix = p;
7170 return(l);
7171 }
7172 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7173 l = xmlDictLookup(ctxt->dict, tmp, -1);
7174 if (tmp != NULL) xmlFree(tmp);
7175 *prefix = p;
7176 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007177 }
7178 *prefix = p;
7179 } else
7180 *prefix = NULL;
7181 return(l);
7182}
7183
7184/**
7185 * xmlParseQNameAndCompare:
7186 * @ctxt: an XML parser context
7187 * @name: the localname
7188 * @prefix: the prefix, if any.
7189 *
7190 * parse an XML name and compares for match
7191 * (specialized for endtag parsing)
7192 *
7193 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7194 * and the name for mismatch
7195 */
7196
7197static const xmlChar *
7198xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7199 xmlChar const *prefix) {
7200 const xmlChar *cmp = name;
7201 const xmlChar *in;
7202 const xmlChar *ret;
7203 const xmlChar *prefix2;
7204
7205 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7206
7207 GROW;
7208 in = ctxt->input->cur;
7209
7210 cmp = prefix;
7211 while (*in != 0 && *in == *cmp) {
7212 ++in;
7213 ++cmp;
7214 }
7215 if ((*cmp == 0) && (*in == ':')) {
7216 in++;
7217 cmp = name;
7218 while (*in != 0 && *in == *cmp) {
7219 ++in;
7220 ++cmp;
7221 }
William M. Brack76e95df2003-10-18 16:20:14 +00007222 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007223 /* success */
7224 ctxt->input->cur = in;
7225 return((const xmlChar*) 1);
7226 }
7227 }
7228 /*
7229 * all strings coms from the dictionary, equality can be done directly
7230 */
7231 ret = xmlParseQName (ctxt, &prefix2);
7232 if ((ret == name) && (prefix == prefix2))
7233 return((const xmlChar*) 1);
7234 return ret;
7235}
7236
7237/**
7238 * xmlParseAttValueInternal:
7239 * @ctxt: an XML parser context
7240 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007241 * @alloc: whether the attribute was reallocated as a new string
7242 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007243 *
7244 * parse a value for an attribute.
7245 * NOTE: if no normalization is needed, the routine will return pointers
7246 * directly from the data buffer.
7247 *
7248 * 3.3.3 Attribute-Value Normalization:
7249 * Before the value of an attribute is passed to the application or
7250 * checked for validity, the XML processor must normalize it as follows:
7251 * - a character reference is processed by appending the referenced
7252 * character to the attribute value
7253 * - an entity reference is processed by recursively processing the
7254 * replacement text of the entity
7255 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7256 * appending #x20 to the normalized value, except that only a single
7257 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7258 * parsed entity or the literal entity value of an internal parsed entity
7259 * - other characters are processed by appending them to the normalized value
7260 * If the declared value is not CDATA, then the XML processor must further
7261 * process the normalized attribute value by discarding any leading and
7262 * trailing space (#x20) characters, and by replacing sequences of space
7263 * (#x20) characters by a single space (#x20) character.
7264 * All attributes for which no declaration has been read should be treated
7265 * by a non-validating parser as if declared CDATA.
7266 *
7267 * Returns the AttValue parsed or NULL. The value has to be freed by the
7268 * caller if it was copied, this can be detected by val[*len] == 0.
7269 */
7270
7271static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007272xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7273 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007274{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007275 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007276 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007277 xmlChar *ret = NULL;
7278
7279 GROW;
7280 in = (xmlChar *) CUR_PTR;
7281 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007282 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007283 return (NULL);
7284 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007285 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007286
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007287 /*
7288 * try to handle in this routine the most common case where no
7289 * allocation of a new string is required and where content is
7290 * pure ASCII.
7291 */
7292 limit = *in++;
7293 end = ctxt->input->end;
7294 start = in;
7295 if (in >= end) {
7296 const xmlChar *oldbase = ctxt->input->base;
7297 GROW;
7298 if (oldbase != ctxt->input->base) {
7299 long delta = ctxt->input->base - oldbase;
7300 start = start + delta;
7301 in = in + delta;
7302 }
7303 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007305 if (normalize) {
7306 /*
7307 * Skip any leading spaces
7308 */
7309 while ((in < end) && (*in != limit) &&
7310 ((*in == 0x20) || (*in == 0x9) ||
7311 (*in == 0xA) || (*in == 0xD))) {
7312 in++;
7313 start = in;
7314 if (in >= end) {
7315 const xmlChar *oldbase = ctxt->input->base;
7316 GROW;
7317 if (oldbase != ctxt->input->base) {
7318 long delta = ctxt->input->base - oldbase;
7319 start = start + delta;
7320 in = in + delta;
7321 }
7322 end = ctxt->input->end;
7323 }
7324 }
7325 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7326 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7327 if ((*in++ == 0x20) && (*in == 0x20)) break;
7328 if (in >= end) {
7329 const xmlChar *oldbase = ctxt->input->base;
7330 GROW;
7331 if (oldbase != ctxt->input->base) {
7332 long delta = ctxt->input->base - oldbase;
7333 start = start + delta;
7334 in = in + delta;
7335 }
7336 end = ctxt->input->end;
7337 }
7338 }
7339 last = in;
7340 /*
7341 * skip the trailing blanks
7342 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007343 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007344 while ((in < end) && (*in != limit) &&
7345 ((*in == 0x20) || (*in == 0x9) ||
7346 (*in == 0xA) || (*in == 0xD))) {
7347 in++;
7348 if (in >= end) {
7349 const xmlChar *oldbase = ctxt->input->base;
7350 GROW;
7351 if (oldbase != ctxt->input->base) {
7352 long delta = ctxt->input->base - oldbase;
7353 start = start + delta;
7354 in = in + delta;
7355 last = last + delta;
7356 }
7357 end = ctxt->input->end;
7358 }
7359 }
7360 if (*in != limit) goto need_complex;
7361 } else {
7362 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7363 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7364 in++;
7365 if (in >= end) {
7366 const xmlChar *oldbase = ctxt->input->base;
7367 GROW;
7368 if (oldbase != ctxt->input->base) {
7369 long delta = ctxt->input->base - oldbase;
7370 start = start + delta;
7371 in = in + delta;
7372 }
7373 end = ctxt->input->end;
7374 }
7375 }
7376 last = in;
7377 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007379 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007381 *len = last - start;
7382 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007383 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007384 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007385 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007386 }
7387 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007388 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007389 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007390need_complex:
7391 if (alloc) *alloc = 1;
7392 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007393}
7394
7395/**
7396 * xmlParseAttribute2:
7397 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007398 * @pref: the element prefix
7399 * @elem: the element name
7400 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007401 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007402 * @len: an int * to save the length of the attribute
7403 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404 *
7405 * parse an attribute in the new SAX2 framework.
7406 *
7407 * Returns the attribute name, and the value in *value, .
7408 */
7409
7410static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007411xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7412 const xmlChar *pref, const xmlChar *elem,
7413 const xmlChar **prefix, xmlChar **value,
7414 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007415 const xmlChar *name;
7416 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007417 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007418
7419 *value = NULL;
7420 GROW;
7421 name = xmlParseQName(ctxt, prefix);
7422 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007423 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7424 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007425 return(NULL);
7426 }
7427
7428 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007429 * get the type if needed
7430 */
7431 if (ctxt->attsSpecial != NULL) {
7432 int type;
7433
7434 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7435 pref, elem, *prefix, name);
7436 if (type != 0) normalize = 1;
7437 }
7438
7439 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007440 * read the value
7441 */
7442 SKIP_BLANKS;
7443 if (RAW == '=') {
7444 NEXT;
7445 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007446 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007447 ctxt->instate = XML_PARSER_CONTENT;
7448 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007449 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007451 return(NULL);
7452 }
7453
7454 /*
7455 * Check that xml:lang conforms to the specification
7456 * No more registered as an error, just generate a warning now
7457 * since this was deprecated in XML second edition
7458 */
7459 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7460 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007461 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7462 "Malformed value for xml:lang : %s\n",
7463 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007464 }
7465 }
7466
7467 /*
7468 * Check that xml:space conforms to the specification
7469 */
7470 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7471 if (xmlStrEqual(val, BAD_CAST "default"))
7472 *(ctxt->space) = 0;
7473 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7474 *(ctxt->space) = 1;
7475 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007476 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007477"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7478 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 }
7480 }
7481
7482 *value = val;
7483 return(name);
7484}
7485
7486/**
7487 * xmlParseStartTag2:
7488 * @ctxt: an XML parser context
7489 *
7490 * parse a start of tag either for rule element or
7491 * EmptyElement. In both case we don't parse the tag closing chars.
7492 * This routine is called when running SAX2 parsing
7493 *
7494 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7495 *
7496 * [ WFC: Unique Att Spec ]
7497 * No attribute name may appear more than once in the same start-tag or
7498 * empty-element tag.
7499 *
7500 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7501 *
7502 * [ WFC: Unique Att Spec ]
7503 * No attribute name may appear more than once in the same start-tag or
7504 * empty-element tag.
7505 *
7506 * With namespace:
7507 *
7508 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7509 *
7510 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7511 *
7512 * Returns the element name parsed
7513 */
7514
7515static const xmlChar *
7516xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007517 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 const xmlChar *localname;
7519 const xmlChar *prefix;
7520 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007521 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007522 const xmlChar *nsname;
7523 xmlChar *attvalue;
7524 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007526 int nratts, nbatts, nbdef;
7527 int i, j, nbNs, attval;
7528 const xmlChar *base;
7529 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530
7531 if (RAW != '<') return(NULL);
7532 NEXT1;
7533
7534 /*
7535 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7536 * point since the attribute values may be stored as pointers to
7537 * the buffer and calling SHRINK would destroy them !
7538 * The Shrinking is only possible once the full set of attribute
7539 * callbacks have been done.
7540 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007541reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007542 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007543 base = ctxt->input->base;
7544 cur = ctxt->input->cur - ctxt->input->base;
7545 nbatts = 0;
7546 nratts = 0;
7547 nbdef = 0;
7548 nbNs = 0;
7549 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007550
7551 localname = xmlParseQName(ctxt, &prefix);
7552 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007555 return(NULL);
7556 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007557 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007558
7559 /*
7560 * Now parse the attributes, it ends up with the ending
7561 *
7562 * (S Attribute)* S?
7563 */
7564 SKIP_BLANKS;
7565 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007566 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007567
7568 while ((RAW != '>') &&
7569 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007570 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007571 const xmlChar *q = CUR_PTR;
7572 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007573 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007574
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007575 attname = xmlParseAttribute2(ctxt, prefix, localname,
7576 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007577 if ((attname != NULL) && (attvalue != NULL)) {
7578 if (len < 0) len = xmlStrlen(attvalue);
7579 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007580 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7581 xmlURIPtr uri;
7582
7583 if (*URL != 0) {
7584 uri = xmlParseURI((const char *) URL);
7585 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007586 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7587 "xmlns: %s not a valid URI\n",
7588 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007589 } else {
7590 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007591 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7592 "xmlns: URI %s is not absolute\n",
7593 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007594 }
7595 xmlFreeURI(uri);
7596 }
7597 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007598 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007599 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007600 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007601 for (j = 1;j <= nbNs;j++)
7602 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7603 break;
7604 if (j <= nbNs)
7605 xmlErrAttributeDup(ctxt, NULL, attname);
7606 else
7607 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007608 if (alloc != 0) xmlFree(attvalue);
7609 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007610 continue;
7611 }
7612 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007613 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7614 xmlURIPtr uri;
7615
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007616 if (attname == ctxt->str_xml) {
7617 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007618 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7619 "xml namespace prefix mapped to wrong URI\n",
7620 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007621 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007622 /*
7623 * Do not keep a namespace definition node
7624 */
7625 if (alloc != 0) xmlFree(attvalue);
7626 SKIP_BLANKS;
7627 continue;
7628 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007629 uri = xmlParseURI((const char *) URL);
7630 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007631 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7632 "xmlns:%s: '%s' is not a valid URI\n",
7633 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007634 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007635 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007636 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7637 "xmlns:%s: URI %s is not absolute\n",
7638 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007639 }
7640 xmlFreeURI(uri);
7641 }
7642
Daniel Veillard0fb18932003-09-07 09:14:37 +00007643 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007644 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007645 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007646 for (j = 1;j <= nbNs;j++)
7647 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7648 break;
7649 if (j <= nbNs)
7650 xmlErrAttributeDup(ctxt, aprefix, attname);
7651 else
7652 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007653 if (alloc != 0) xmlFree(attvalue);
7654 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007655 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007656 continue;
7657 }
7658
7659 /*
7660 * Add the pair to atts
7661 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007662 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7663 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 if (attvalue[len] == 0)
7665 xmlFree(attvalue);
7666 goto failed;
7667 }
7668 maxatts = ctxt->maxatts;
7669 atts = ctxt->atts;
7670 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007672 atts[nbatts++] = attname;
7673 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007674 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007675 atts[nbatts++] = attvalue;
7676 attvalue += len;
7677 atts[nbatts++] = attvalue;
7678 /*
7679 * tag if some deallocation is needed
7680 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007681 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007682 } else {
7683 if ((attvalue != NULL) && (attvalue[len] == 0))
7684 xmlFree(attvalue);
7685 }
7686
7687failed:
7688
7689 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007690 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007691 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7692 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007693 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7695 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007696 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007697 }
7698 SKIP_BLANKS;
7699 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7700 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007701 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007703 break;
7704 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007705 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007706 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007707 }
7708
Daniel Veillard0fb18932003-09-07 09:14:37 +00007709 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007710 * The attributes defaulting
7711 */
7712 if (ctxt->attsDefault != NULL) {
7713 xmlDefAttrsPtr defaults;
7714
7715 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7716 if (defaults != NULL) {
7717 for (i = 0;i < defaults->nbAttrs;i++) {
7718 attname = defaults->values[4 * i];
7719 aprefix = defaults->values[4 * i + 1];
7720
7721 /*
7722 * special work for namespaces defaulted defs
7723 */
7724 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7725 /*
7726 * check that it's not a defined namespace
7727 */
7728 for (j = 1;j <= nbNs;j++)
7729 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7730 break;
7731 if (j <= nbNs) continue;
7732
7733 nsname = xmlGetNamespace(ctxt, NULL);
7734 if (nsname != defaults->values[4 * i + 2]) {
7735 if (nsPush(ctxt, NULL,
7736 defaults->values[4 * i + 2]) > 0)
7737 nbNs++;
7738 }
7739 } else if (aprefix == ctxt->str_xmlns) {
7740 /*
7741 * check that it's not a defined namespace
7742 */
7743 for (j = 1;j <= nbNs;j++)
7744 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7745 break;
7746 if (j <= nbNs) continue;
7747
7748 nsname = xmlGetNamespace(ctxt, attname);
7749 if (nsname != defaults->values[2]) {
7750 if (nsPush(ctxt, attname,
7751 defaults->values[4 * i + 2]) > 0)
7752 nbNs++;
7753 }
7754 } else {
7755 /*
7756 * check that it's not a defined attribute
7757 */
7758 for (j = 0;j < nbatts;j+=5) {
7759 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7760 break;
7761 }
7762 if (j < nbatts) continue;
7763
7764 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7765 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007766 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007767 }
7768 maxatts = ctxt->maxatts;
7769 atts = ctxt->atts;
7770 }
7771 atts[nbatts++] = attname;
7772 atts[nbatts++] = aprefix;
7773 if (aprefix == NULL)
7774 atts[nbatts++] = NULL;
7775 else
7776 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7777 atts[nbatts++] = defaults->values[4 * i + 2];
7778 atts[nbatts++] = defaults->values[4 * i + 3];
7779 nbdef++;
7780 }
7781 }
7782 }
7783 }
7784
Daniel Veillarde70c8772003-11-25 07:21:18 +00007785 /*
7786 * The attributes checkings
7787 */
7788 for (i = 0; i < nbatts;i += 5) {
7789 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7790 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7791 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7792 "Namespace prefix %s for %s on %s is not defined\n",
7793 atts[i + 1], atts[i], localname);
7794 }
7795 atts[i + 2] = nsname;
7796 /*
7797 * [ WFC: Unique Att Spec ]
7798 * No attribute name may appear more than once in the same
7799 * start-tag or empty-element tag.
7800 * As extended by the Namespace in XML REC.
7801 */
7802 for (j = 0; j < i;j += 5) {
7803 if (atts[i] == atts[j]) {
7804 if (atts[i+1] == atts[j+1]) {
7805 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7806 break;
7807 }
7808 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7809 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7810 "Namespaced Attribute %s in '%s' redefined\n",
7811 atts[i], nsname, NULL);
7812 break;
7813 }
7814 }
7815 }
7816 }
7817
Daniel Veillarde57ec792003-09-10 10:50:59 +00007818 nsname = xmlGetNamespace(ctxt, prefix);
7819 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007820 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7821 "Namespace prefix %s on %s is not defined\n",
7822 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007823 }
7824 *pref = prefix;
7825 *URI = nsname;
7826
7827 /*
7828 * SAX: Start of Element !
7829 */
7830 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7831 (!ctxt->disableSAX)) {
7832 if (nbNs > 0)
7833 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7834 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7835 nbatts / 5, nbdef, atts);
7836 else
7837 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7838 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7839 }
7840
7841 /*
7842 * Free up attribute allocated strings if needed
7843 */
7844 if (attval != 0) {
7845 for (i = 3,j = 0; j < nratts;i += 5,j++)
7846 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7847 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007848 }
7849
7850 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007851
7852base_changed:
7853 /*
7854 * the attribute strings are valid iif the base didn't changed
7855 */
7856 if (attval != 0) {
7857 for (i = 3,j = 0; j < nratts;i += 5,j++)
7858 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7859 xmlFree((xmlChar *) atts[i]);
7860 }
7861 ctxt->input->cur = ctxt->input->base + cur;
7862 if (ctxt->wellFormed == 1) {
7863 goto reparse;
7864 }
7865 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007866}
7867
7868/**
7869 * xmlParseEndTag2:
7870 * @ctxt: an XML parser context
7871 * @line: line of the start tag
7872 * @nsNr: number of namespaces on the start tag
7873 *
7874 * parse an end of tag
7875 *
7876 * [42] ETag ::= '</' Name S? '>'
7877 *
7878 * With namespace
7879 *
7880 * [NS 9] ETag ::= '</' QName S? '>'
7881 */
7882
7883static void
7884xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007885 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007886 const xmlChar *name;
7887
7888 GROW;
7889 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007890 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007891 return;
7892 }
7893 SKIP(2);
7894
William M. Brack13dfa872004-09-18 04:52:08 +00007895 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007896 if (ctxt->input->cur[tlen] == '>') {
7897 ctxt->input->cur += tlen + 1;
7898 goto done;
7899 }
7900 ctxt->input->cur += tlen;
7901 name = (xmlChar*)1;
7902 } else {
7903 if (prefix == NULL)
7904 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7905 else
7906 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7907 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908
7909 /*
7910 * We should definitely be at the ending "S? '>'" part
7911 */
7912 GROW;
7913 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007914 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007915 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007916 } else
7917 NEXT1;
7918
7919 /*
7920 * [ WFC: Element Type Match ]
7921 * The Name in an element's end-tag must match the element type in the
7922 * start-tag.
7923 *
7924 */
7925 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007926 if (name == NULL) name = BAD_CAST "unparseable";
7927 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007928 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007929 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007930 }
7931
7932 /*
7933 * SAX: End of Tag
7934 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007935done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007936 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7937 (!ctxt->disableSAX))
7938 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7939
Daniel Veillard0fb18932003-09-07 09:14:37 +00007940 spacePop(ctxt);
7941 if (nsNr != 0)
7942 nsPop(ctxt, nsNr);
7943 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007944}
7945
7946/**
Owen Taylor3473f882001-02-23 17:55:21 +00007947 * xmlParseCDSect:
7948 * @ctxt: an XML parser context
7949 *
7950 * Parse escaped pure raw content.
7951 *
7952 * [18] CDSect ::= CDStart CData CDEnd
7953 *
7954 * [19] CDStart ::= '<![CDATA['
7955 *
7956 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7957 *
7958 * [21] CDEnd ::= ']]>'
7959 */
7960void
7961xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7962 xmlChar *buf = NULL;
7963 int len = 0;
7964 int size = XML_PARSER_BUFFER_SIZE;
7965 int r, rl;
7966 int s, sl;
7967 int cur, l;
7968 int count = 0;
7969
Daniel Veillard8f597c32003-10-06 08:19:27 +00007970 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007971 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007972 SKIP(9);
7973 } else
7974 return;
7975
7976 ctxt->instate = XML_PARSER_CDATA_SECTION;
7977 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007978 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007979 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007980 ctxt->instate = XML_PARSER_CONTENT;
7981 return;
7982 }
7983 NEXTL(rl);
7984 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007985 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007986 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007987 ctxt->instate = XML_PARSER_CONTENT;
7988 return;
7989 }
7990 NEXTL(sl);
7991 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007992 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007993 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007994 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007995 return;
7996 }
William M. Brack871611b2003-10-18 04:53:14 +00007997 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007998 ((r != ']') || (s != ']') || (cur != '>'))) {
7999 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008000 xmlChar *tmp;
8001
Owen Taylor3473f882001-02-23 17:55:21 +00008002 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008003 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8004 if (tmp == NULL) {
8005 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008006 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008007 return;
8008 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008009 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008010 }
8011 COPY_BUF(rl,buf,len,r);
8012 r = s;
8013 rl = sl;
8014 s = cur;
8015 sl = l;
8016 count++;
8017 if (count > 50) {
8018 GROW;
8019 count = 0;
8020 }
8021 NEXTL(l);
8022 cur = CUR_CHAR(l);
8023 }
8024 buf[len] = 0;
8025 ctxt->instate = XML_PARSER_CONTENT;
8026 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008027 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008028 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008029 xmlFree(buf);
8030 return;
8031 }
8032 NEXTL(l);
8033
8034 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008035 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008036 */
8037 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8038 if (ctxt->sax->cdataBlock != NULL)
8039 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008040 else if (ctxt->sax->characters != NULL)
8041 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008042 }
8043 xmlFree(buf);
8044}
8045
8046/**
8047 * xmlParseContent:
8048 * @ctxt: an XML parser context
8049 *
8050 * Parse a content:
8051 *
8052 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8053 */
8054
8055void
8056xmlParseContent(xmlParserCtxtPtr ctxt) {
8057 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008058 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008059 ((RAW != '<') || (NXT(1) != '/'))) {
8060 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008061 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008062 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008063
8064 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008065 * First case : a Processing Instruction.
8066 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008067 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008068 xmlParsePI(ctxt);
8069 }
8070
8071 /*
8072 * Second case : a CDSection
8073 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008074 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008075 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008076 xmlParseCDSect(ctxt);
8077 }
8078
8079 /*
8080 * Third case : a comment
8081 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008082 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008083 (NXT(2) == '-') && (NXT(3) == '-')) {
8084 xmlParseComment(ctxt);
8085 ctxt->instate = XML_PARSER_CONTENT;
8086 }
8087
8088 /*
8089 * Fourth case : a sub-element.
8090 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008091 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008092 xmlParseElement(ctxt);
8093 }
8094
8095 /*
8096 * Fifth case : a reference. If if has not been resolved,
8097 * parsing returns it's Name, create the node
8098 */
8099
Daniel Veillard21a0f912001-02-25 19:54:14 +00008100 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008101 xmlParseReference(ctxt);
8102 }
8103
8104 /*
8105 * Last case, text. Note that References are handled directly.
8106 */
8107 else {
8108 xmlParseCharData(ctxt, 0);
8109 }
8110
8111 GROW;
8112 /*
8113 * Pop-up of finished entities.
8114 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008115 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008116 xmlPopInput(ctxt);
8117 SHRINK;
8118
Daniel Veillardfdc91562002-07-01 21:52:03 +00008119 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008120 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8121 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008122 ctxt->instate = XML_PARSER_EOF;
8123 break;
8124 }
8125 }
8126}
8127
8128/**
8129 * xmlParseElement:
8130 * @ctxt: an XML parser context
8131 *
8132 * parse an XML element, this is highly recursive
8133 *
8134 * [39] element ::= EmptyElemTag | STag content ETag
8135 *
8136 * [ WFC: Element Type Match ]
8137 * The Name in an element's end-tag must match the element type in the
8138 * start-tag.
8139 *
Owen Taylor3473f882001-02-23 17:55:21 +00008140 */
8141
8142void
8143xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008144 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008145 const xmlChar *prefix;
8146 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008147 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008148 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008149 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008150 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008151
8152 /* Capture start position */
8153 if (ctxt->record_info) {
8154 node_info.begin_pos = ctxt->input->consumed +
8155 (CUR_PTR - ctxt->input->base);
8156 node_info.begin_line = ctxt->input->line;
8157 }
8158
8159 if (ctxt->spaceNr == 0)
8160 spacePush(ctxt, -1);
8161 else
8162 spacePush(ctxt, *ctxt->space);
8163
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008164 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008165#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008166 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008167#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008168 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008169#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008170 else
8171 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008172#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008173 if (name == NULL) {
8174 spacePop(ctxt);
8175 return;
8176 }
8177 namePush(ctxt, name);
8178 ret = ctxt->node;
8179
Daniel Veillard4432df22003-09-28 18:58:27 +00008180#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008181 /*
8182 * [ VC: Root Element Type ]
8183 * The Name in the document type declaration must match the element
8184 * type of the root element.
8185 */
8186 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8187 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8188 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008189#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008190
8191 /*
8192 * Check for an Empty Element.
8193 */
8194 if ((RAW == '/') && (NXT(1) == '>')) {
8195 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008196 if (ctxt->sax2) {
8197 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8198 (!ctxt->disableSAX))
8199 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008200#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 } else {
8202 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8203 (!ctxt->disableSAX))
8204 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008205#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008206 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 namePop(ctxt);
8208 spacePop(ctxt);
8209 if (nsNr != ctxt->nsNr)
8210 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008211 if ( ret != NULL && ctxt->record_info ) {
8212 node_info.end_pos = ctxt->input->consumed +
8213 (CUR_PTR - ctxt->input->base);
8214 node_info.end_line = ctxt->input->line;
8215 node_info.node = ret;
8216 xmlParserAddNodeInfo(ctxt, &node_info);
8217 }
8218 return;
8219 }
8220 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008221 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008222 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008223 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8224 "Couldn't find end of Start Tag %s line %d\n",
8225 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008226
8227 /*
8228 * end of parsing of this node.
8229 */
8230 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008232 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 if (nsNr != ctxt->nsNr)
8234 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008235
8236 /*
8237 * Capture end position and add node
8238 */
8239 if ( ret != NULL && ctxt->record_info ) {
8240 node_info.end_pos = ctxt->input->consumed +
8241 (CUR_PTR - ctxt->input->base);
8242 node_info.end_line = ctxt->input->line;
8243 node_info.node = ret;
8244 xmlParserAddNodeInfo(ctxt, &node_info);
8245 }
8246 return;
8247 }
8248
8249 /*
8250 * Parse the content of the element:
8251 */
8252 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008253 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008254 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008255 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008256 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008257
8258 /*
8259 * end of parsing of this node.
8260 */
8261 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008263 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 if (nsNr != ctxt->nsNr)
8265 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008266 return;
8267 }
8268
8269 /*
8270 * parse the end of tag: '</' should be here.
8271 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008272 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008273 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008274 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008275 }
8276#ifdef LIBXML_SAX1_ENABLED
8277 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008279#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008280
8281 /*
8282 * Capture end position and add node
8283 */
8284 if ( ret != NULL && ctxt->record_info ) {
8285 node_info.end_pos = ctxt->input->consumed +
8286 (CUR_PTR - ctxt->input->base);
8287 node_info.end_line = ctxt->input->line;
8288 node_info.node = ret;
8289 xmlParserAddNodeInfo(ctxt, &node_info);
8290 }
8291}
8292
8293/**
8294 * xmlParseVersionNum:
8295 * @ctxt: an XML parser context
8296 *
8297 * parse the XML version value.
8298 *
8299 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8300 *
8301 * Returns the string giving the XML version number, or NULL
8302 */
8303xmlChar *
8304xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8305 xmlChar *buf = NULL;
8306 int len = 0;
8307 int size = 10;
8308 xmlChar cur;
8309
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008310 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008311 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008312 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008313 return(NULL);
8314 }
8315 cur = CUR;
8316 while (((cur >= 'a') && (cur <= 'z')) ||
8317 ((cur >= 'A') && (cur <= 'Z')) ||
8318 ((cur >= '0') && (cur <= '9')) ||
8319 (cur == '_') || (cur == '.') ||
8320 (cur == ':') || (cur == '-')) {
8321 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008322 xmlChar *tmp;
8323
Owen Taylor3473f882001-02-23 17:55:21 +00008324 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008325 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8326 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008327 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008328 return(NULL);
8329 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008330 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008331 }
8332 buf[len++] = cur;
8333 NEXT;
8334 cur=CUR;
8335 }
8336 buf[len] = 0;
8337 return(buf);
8338}
8339
8340/**
8341 * xmlParseVersionInfo:
8342 * @ctxt: an XML parser context
8343 *
8344 * parse the XML version.
8345 *
8346 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8347 *
8348 * [25] Eq ::= S? '=' S?
8349 *
8350 * Returns the version string, e.g. "1.0"
8351 */
8352
8353xmlChar *
8354xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8355 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008356
Daniel Veillarda07050d2003-10-19 14:46:32 +00008357 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008358 SKIP(7);
8359 SKIP_BLANKS;
8360 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008361 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008362 return(NULL);
8363 }
8364 NEXT;
8365 SKIP_BLANKS;
8366 if (RAW == '"') {
8367 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008368 version = xmlParseVersionNum(ctxt);
8369 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008370 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 } else
8372 NEXT;
8373 } else if (RAW == '\''){
8374 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008375 version = xmlParseVersionNum(ctxt);
8376 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008377 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008378 } else
8379 NEXT;
8380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008381 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008382 }
8383 }
8384 return(version);
8385}
8386
8387/**
8388 * xmlParseEncName:
8389 * @ctxt: an XML parser context
8390 *
8391 * parse the XML encoding name
8392 *
8393 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8394 *
8395 * Returns the encoding name value or NULL
8396 */
8397xmlChar *
8398xmlParseEncName(xmlParserCtxtPtr ctxt) {
8399 xmlChar *buf = NULL;
8400 int len = 0;
8401 int size = 10;
8402 xmlChar cur;
8403
8404 cur = CUR;
8405 if (((cur >= 'a') && (cur <= 'z')) ||
8406 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008407 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008408 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008409 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008410 return(NULL);
8411 }
8412
8413 buf[len++] = cur;
8414 NEXT;
8415 cur = CUR;
8416 while (((cur >= 'a') && (cur <= 'z')) ||
8417 ((cur >= 'A') && (cur <= 'Z')) ||
8418 ((cur >= '0') && (cur <= '9')) ||
8419 (cur == '.') || (cur == '_') ||
8420 (cur == '-')) {
8421 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008422 xmlChar *tmp;
8423
Owen Taylor3473f882001-02-23 17:55:21 +00008424 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008425 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8426 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008427 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008428 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008429 return(NULL);
8430 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008431 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008432 }
8433 buf[len++] = cur;
8434 NEXT;
8435 cur = CUR;
8436 if (cur == 0) {
8437 SHRINK;
8438 GROW;
8439 cur = CUR;
8440 }
8441 }
8442 buf[len] = 0;
8443 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008444 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008445 }
8446 return(buf);
8447}
8448
8449/**
8450 * xmlParseEncodingDecl:
8451 * @ctxt: an XML parser context
8452 *
8453 * parse the XML encoding declaration
8454 *
8455 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8456 *
8457 * this setups the conversion filters.
8458 *
8459 * Returns the encoding value or NULL
8460 */
8461
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008462const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008463xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8464 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008465
8466 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008467 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008468 SKIP(8);
8469 SKIP_BLANKS;
8470 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008471 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008472 return(NULL);
8473 }
8474 NEXT;
8475 SKIP_BLANKS;
8476 if (RAW == '"') {
8477 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008478 encoding = xmlParseEncName(ctxt);
8479 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008480 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008481 } else
8482 NEXT;
8483 } else if (RAW == '\''){
8484 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008485 encoding = xmlParseEncName(ctxt);
8486 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008487 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008488 } else
8489 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008490 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008491 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008492 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008493 /*
8494 * UTF-16 encoding stwich has already taken place at this stage,
8495 * more over the little-endian/big-endian selection is already done
8496 */
8497 if ((encoding != NULL) &&
8498 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8499 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008500 if (ctxt->encoding != NULL)
8501 xmlFree((xmlChar *) ctxt->encoding);
8502 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008503 }
8504 /*
8505 * UTF-8 encoding is handled natively
8506 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008507 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008508 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8509 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008510 if (ctxt->encoding != NULL)
8511 xmlFree((xmlChar *) ctxt->encoding);
8512 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008513 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008514 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008515 xmlCharEncodingHandlerPtr handler;
8516
8517 if (ctxt->input->encoding != NULL)
8518 xmlFree((xmlChar *) ctxt->input->encoding);
8519 ctxt->input->encoding = encoding;
8520
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008521 handler = xmlFindCharEncodingHandler((const char *) encoding);
8522 if (handler != NULL) {
8523 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008524 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008525 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008526 "Unsupported encoding %s\n", encoding);
8527 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008528 }
8529 }
8530 }
8531 return(encoding);
8532}
8533
8534/**
8535 * xmlParseSDDecl:
8536 * @ctxt: an XML parser context
8537 *
8538 * parse the XML standalone declaration
8539 *
8540 * [32] SDDecl ::= S 'standalone' Eq
8541 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8542 *
8543 * [ VC: Standalone Document Declaration ]
8544 * TODO The standalone document declaration must have the value "no"
8545 * if any external markup declarations contain declarations of:
8546 * - attributes with default values, if elements to which these
8547 * attributes apply appear in the document without specifications
8548 * of values for these attributes, or
8549 * - entities (other than amp, lt, gt, apos, quot), if references
8550 * to those entities appear in the document, or
8551 * - attributes with values subject to normalization, where the
8552 * attribute appears in the document with a value which will change
8553 * as a result of normalization, or
8554 * - element types with element content, if white space occurs directly
8555 * within any instance of those types.
8556 *
8557 * Returns 1 if standalone, 0 otherwise
8558 */
8559
8560int
8561xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8562 int standalone = -1;
8563
8564 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008565 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008566 SKIP(10);
8567 SKIP_BLANKS;
8568 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008569 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008570 return(standalone);
8571 }
8572 NEXT;
8573 SKIP_BLANKS;
8574 if (RAW == '\''){
8575 NEXT;
8576 if ((RAW == 'n') && (NXT(1) == 'o')) {
8577 standalone = 0;
8578 SKIP(2);
8579 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8580 (NXT(2) == 's')) {
8581 standalone = 1;
8582 SKIP(3);
8583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008584 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008585 }
8586 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008587 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008588 } else
8589 NEXT;
8590 } else if (RAW == '"'){
8591 NEXT;
8592 if ((RAW == 'n') && (NXT(1) == 'o')) {
8593 standalone = 0;
8594 SKIP(2);
8595 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8596 (NXT(2) == 's')) {
8597 standalone = 1;
8598 SKIP(3);
8599 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008600 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008601 }
8602 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008603 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008604 } else
8605 NEXT;
8606 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008607 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008608 }
8609 }
8610 return(standalone);
8611}
8612
8613/**
8614 * xmlParseXMLDecl:
8615 * @ctxt: an XML parser context
8616 *
8617 * parse an XML declaration header
8618 *
8619 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8620 */
8621
8622void
8623xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8624 xmlChar *version;
8625
8626 /*
8627 * We know that '<?xml' is here.
8628 */
8629 SKIP(5);
8630
William M. Brack76e95df2003-10-18 16:20:14 +00008631 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008632 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8633 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008634 }
8635 SKIP_BLANKS;
8636
8637 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008638 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008639 */
8640 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008641 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008642 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008643 } else {
8644 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8645 /*
8646 * TODO: Blueberry should be detected here
8647 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008648 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8649 "Unsupported version '%s'\n",
8650 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008651 }
8652 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008653 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008654 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008655 }
Owen Taylor3473f882001-02-23 17:55:21 +00008656
8657 /*
8658 * We may have the encoding declaration
8659 */
William M. Brack76e95df2003-10-18 16:20:14 +00008660 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008661 if ((RAW == '?') && (NXT(1) == '>')) {
8662 SKIP(2);
8663 return;
8664 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008666 }
8667 xmlParseEncodingDecl(ctxt);
8668 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8669 /*
8670 * The XML REC instructs us to stop parsing right here
8671 */
8672 return;
8673 }
8674
8675 /*
8676 * We may have the standalone status.
8677 */
William M. Brack76e95df2003-10-18 16:20:14 +00008678 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008679 if ((RAW == '?') && (NXT(1) == '>')) {
8680 SKIP(2);
8681 return;
8682 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008684 }
8685 SKIP_BLANKS;
8686 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8687
8688 SKIP_BLANKS;
8689 if ((RAW == '?') && (NXT(1) == '>')) {
8690 SKIP(2);
8691 } else if (RAW == '>') {
8692 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008693 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008694 NEXT;
8695 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008696 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008697 MOVETO_ENDTAG(CUR_PTR);
8698 NEXT;
8699 }
8700}
8701
8702/**
8703 * xmlParseMisc:
8704 * @ctxt: an XML parser context
8705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008706 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008707 *
8708 * [27] Misc ::= Comment | PI | S
8709 */
8710
8711void
8712xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008713 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008714 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008715 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008716 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008717 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008718 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008719 NEXT;
8720 } else
8721 xmlParseComment(ctxt);
8722 }
8723}
8724
8725/**
8726 * xmlParseDocument:
8727 * @ctxt: an XML parser context
8728 *
8729 * parse an XML document (and build a tree if using the standard SAX
8730 * interface).
8731 *
8732 * [1] document ::= prolog element Misc*
8733 *
8734 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8735 *
8736 * Returns 0, -1 in case of error. the parser context is augmented
8737 * as a result of the parsing.
8738 */
8739
8740int
8741xmlParseDocument(xmlParserCtxtPtr ctxt) {
8742 xmlChar start[4];
8743 xmlCharEncoding enc;
8744
8745 xmlInitParser();
8746
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008747 if ((ctxt == NULL) || (ctxt->input == NULL))
8748 return(-1);
8749
Owen Taylor3473f882001-02-23 17:55:21 +00008750 GROW;
8751
8752 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008753 * SAX: detecting the level.
8754 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008755 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756
8757 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008758 * SAX: beginning of the document processing.
8759 */
8760 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8761 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8762
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008763 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8764 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008765 /*
8766 * Get the 4 first bytes and decode the charset
8767 * if enc != XML_CHAR_ENCODING_NONE
8768 * plug some encoding conversion routines.
8769 */
8770 start[0] = RAW;
8771 start[1] = NXT(1);
8772 start[2] = NXT(2);
8773 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008774 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008775 if (enc != XML_CHAR_ENCODING_NONE) {
8776 xmlSwitchEncoding(ctxt, enc);
8777 }
Owen Taylor3473f882001-02-23 17:55:21 +00008778 }
8779
8780
8781 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008782 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008783 }
8784
8785 /*
8786 * Check for the XMLDecl in the Prolog.
8787 */
8788 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008789 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008790
8791 /*
8792 * Note that we will switch encoding on the fly.
8793 */
8794 xmlParseXMLDecl(ctxt);
8795 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8796 /*
8797 * The XML REC instructs us to stop parsing right here
8798 */
8799 return(-1);
8800 }
8801 ctxt->standalone = ctxt->input->standalone;
8802 SKIP_BLANKS;
8803 } else {
8804 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8805 }
8806 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8807 ctxt->sax->startDocument(ctxt->userData);
8808
8809 /*
8810 * The Misc part of the Prolog
8811 */
8812 GROW;
8813 xmlParseMisc(ctxt);
8814
8815 /*
8816 * Then possibly doc type declaration(s) and more Misc
8817 * (doctypedecl Misc*)?
8818 */
8819 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008820 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008821
8822 ctxt->inSubset = 1;
8823 xmlParseDocTypeDecl(ctxt);
8824 if (RAW == '[') {
8825 ctxt->instate = XML_PARSER_DTD;
8826 xmlParseInternalSubset(ctxt);
8827 }
8828
8829 /*
8830 * Create and update the external subset.
8831 */
8832 ctxt->inSubset = 2;
8833 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8834 (!ctxt->disableSAX))
8835 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8836 ctxt->extSubSystem, ctxt->extSubURI);
8837 ctxt->inSubset = 0;
8838
8839
8840 ctxt->instate = XML_PARSER_PROLOG;
8841 xmlParseMisc(ctxt);
8842 }
8843
8844 /*
8845 * Time to start parsing the tree itself
8846 */
8847 GROW;
8848 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008849 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8850 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008851 } else {
8852 ctxt->instate = XML_PARSER_CONTENT;
8853 xmlParseElement(ctxt);
8854 ctxt->instate = XML_PARSER_EPILOG;
8855
8856
8857 /*
8858 * The Misc part at the end
8859 */
8860 xmlParseMisc(ctxt);
8861
Daniel Veillard561b7f82002-03-20 21:55:57 +00008862 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008863 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008864 }
8865 ctxt->instate = XML_PARSER_EOF;
8866 }
8867
8868 /*
8869 * SAX: end of the document processing.
8870 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008871 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008872 ctxt->sax->endDocument(ctxt->userData);
8873
Daniel Veillard5997aca2002-03-18 18:36:20 +00008874 /*
8875 * Remove locally kept entity definitions if the tree was not built
8876 */
8877 if ((ctxt->myDoc != NULL) &&
8878 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8879 xmlFreeDoc(ctxt->myDoc);
8880 ctxt->myDoc = NULL;
8881 }
8882
Daniel Veillardc7612992002-02-17 22:47:37 +00008883 if (! ctxt->wellFormed) {
8884 ctxt->valid = 0;
8885 return(-1);
8886 }
Owen Taylor3473f882001-02-23 17:55:21 +00008887 return(0);
8888}
8889
8890/**
8891 * xmlParseExtParsedEnt:
8892 * @ctxt: an XML parser context
8893 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008894 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008895 * An external general parsed entity is well-formed if it matches the
8896 * production labeled extParsedEnt.
8897 *
8898 * [78] extParsedEnt ::= TextDecl? content
8899 *
8900 * Returns 0, -1 in case of error. the parser context is augmented
8901 * as a result of the parsing.
8902 */
8903
8904int
8905xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8906 xmlChar start[4];
8907 xmlCharEncoding enc;
8908
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008909 if ((ctxt == NULL) || (ctxt->input == NULL))
8910 return(-1);
8911
Owen Taylor3473f882001-02-23 17:55:21 +00008912 xmlDefaultSAXHandlerInit();
8913
Daniel Veillard309f81d2003-09-23 09:02:53 +00008914 xmlDetectSAX2(ctxt);
8915
Owen Taylor3473f882001-02-23 17:55:21 +00008916 GROW;
8917
8918 /*
8919 * SAX: beginning of the document processing.
8920 */
8921 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8922 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8923
8924 /*
8925 * Get the 4 first bytes and decode the charset
8926 * if enc != XML_CHAR_ENCODING_NONE
8927 * plug some encoding conversion routines.
8928 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008929 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8930 start[0] = RAW;
8931 start[1] = NXT(1);
8932 start[2] = NXT(2);
8933 start[3] = NXT(3);
8934 enc = xmlDetectCharEncoding(start, 4);
8935 if (enc != XML_CHAR_ENCODING_NONE) {
8936 xmlSwitchEncoding(ctxt, enc);
8937 }
Owen Taylor3473f882001-02-23 17:55:21 +00008938 }
8939
8940
8941 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008942 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008943 }
8944
8945 /*
8946 * Check for the XMLDecl in the Prolog.
8947 */
8948 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008949 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008950
8951 /*
8952 * Note that we will switch encoding on the fly.
8953 */
8954 xmlParseXMLDecl(ctxt);
8955 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8956 /*
8957 * The XML REC instructs us to stop parsing right here
8958 */
8959 return(-1);
8960 }
8961 SKIP_BLANKS;
8962 } else {
8963 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8964 }
8965 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8966 ctxt->sax->startDocument(ctxt->userData);
8967
8968 /*
8969 * Doing validity checking on chunk doesn't make sense
8970 */
8971 ctxt->instate = XML_PARSER_CONTENT;
8972 ctxt->validate = 0;
8973 ctxt->loadsubset = 0;
8974 ctxt->depth = 0;
8975
8976 xmlParseContent(ctxt);
8977
8978 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008979 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008980 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008981 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008982 }
8983
8984 /*
8985 * SAX: end of the document processing.
8986 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008987 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008988 ctxt->sax->endDocument(ctxt->userData);
8989
8990 if (! ctxt->wellFormed) return(-1);
8991 return(0);
8992}
8993
Daniel Veillard73b013f2003-09-30 12:36:01 +00008994#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008995/************************************************************************
8996 * *
8997 * Progressive parsing interfaces *
8998 * *
8999 ************************************************************************/
9000
9001/**
9002 * xmlParseLookupSequence:
9003 * @ctxt: an XML parser context
9004 * @first: the first char to lookup
9005 * @next: the next char to lookup or zero
9006 * @third: the next char to lookup or zero
9007 *
9008 * Try to find if a sequence (first, next, third) or just (first next) or
9009 * (first) is available in the input stream.
9010 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9011 * to avoid rescanning sequences of bytes, it DOES change the state of the
9012 * parser, do not use liberally.
9013 *
9014 * Returns the index to the current parsing point if the full sequence
9015 * is available, -1 otherwise.
9016 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009017static int
Owen Taylor3473f882001-02-23 17:55:21 +00009018xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9019 xmlChar next, xmlChar third) {
9020 int base, len;
9021 xmlParserInputPtr in;
9022 const xmlChar *buf;
9023
9024 in = ctxt->input;
9025 if (in == NULL) return(-1);
9026 base = in->cur - in->base;
9027 if (base < 0) return(-1);
9028 if (ctxt->checkIndex > base)
9029 base = ctxt->checkIndex;
9030 if (in->buf == NULL) {
9031 buf = in->base;
9032 len = in->length;
9033 } else {
9034 buf = in->buf->buffer->content;
9035 len = in->buf->buffer->use;
9036 }
9037 /* take into account the sequence length */
9038 if (third) len -= 2;
9039 else if (next) len --;
9040 for (;base < len;base++) {
9041 if (buf[base] == first) {
9042 if (third != 0) {
9043 if ((buf[base + 1] != next) ||
9044 (buf[base + 2] != third)) continue;
9045 } else if (next != 0) {
9046 if (buf[base + 1] != next) continue;
9047 }
9048 ctxt->checkIndex = 0;
9049#ifdef DEBUG_PUSH
9050 if (next == 0)
9051 xmlGenericError(xmlGenericErrorContext,
9052 "PP: lookup '%c' found at %d\n",
9053 first, base);
9054 else if (third == 0)
9055 xmlGenericError(xmlGenericErrorContext,
9056 "PP: lookup '%c%c' found at %d\n",
9057 first, next, base);
9058 else
9059 xmlGenericError(xmlGenericErrorContext,
9060 "PP: lookup '%c%c%c' found at %d\n",
9061 first, next, third, base);
9062#endif
9063 return(base - (in->cur - in->base));
9064 }
9065 }
9066 ctxt->checkIndex = base;
9067#ifdef DEBUG_PUSH
9068 if (next == 0)
9069 xmlGenericError(xmlGenericErrorContext,
9070 "PP: lookup '%c' failed\n", first);
9071 else if (third == 0)
9072 xmlGenericError(xmlGenericErrorContext,
9073 "PP: lookup '%c%c' failed\n", first, next);
9074 else
9075 xmlGenericError(xmlGenericErrorContext,
9076 "PP: lookup '%c%c%c' failed\n", first, next, third);
9077#endif
9078 return(-1);
9079}
9080
9081/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009082 * xmlParseGetLasts:
9083 * @ctxt: an XML parser context
9084 * @lastlt: pointer to store the last '<' from the input
9085 * @lastgt: pointer to store the last '>' from the input
9086 *
9087 * Lookup the last < and > in the current chunk
9088 */
9089static void
9090xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9091 const xmlChar **lastgt) {
9092 const xmlChar *tmp;
9093
9094 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9095 xmlGenericError(xmlGenericErrorContext,
9096 "Internal error: xmlParseGetLasts\n");
9097 return;
9098 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009099 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009100 tmp = ctxt->input->end;
9101 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009102 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009103 if (tmp < ctxt->input->base) {
9104 *lastlt = NULL;
9105 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009106 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009107 *lastlt = tmp;
9108 tmp++;
9109 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9110 if (*tmp == '\'') {
9111 tmp++;
9112 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9113 if (tmp < ctxt->input->end) tmp++;
9114 } else if (*tmp == '"') {
9115 tmp++;
9116 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9117 if (tmp < ctxt->input->end) tmp++;
9118 } else
9119 tmp++;
9120 }
9121 if (tmp < ctxt->input->end)
9122 *lastgt = tmp;
9123 else {
9124 tmp = *lastlt;
9125 tmp--;
9126 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9127 if (tmp >= ctxt->input->base)
9128 *lastgt = tmp;
9129 else
9130 *lastgt = NULL;
9131 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009133 } else {
9134 *lastlt = NULL;
9135 *lastgt = NULL;
9136 }
9137}
9138/**
Owen Taylor3473f882001-02-23 17:55:21 +00009139 * xmlParseTryOrFinish:
9140 * @ctxt: an XML parser context
9141 * @terminate: last chunk indicator
9142 *
9143 * Try to progress on parsing
9144 *
9145 * Returns zero if no parsing was possible
9146 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009147static int
Owen Taylor3473f882001-02-23 17:55:21 +00009148xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9149 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009150 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009151 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009152 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009153
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009154 if (ctxt->input == NULL)
9155 return(0);
9156
Owen Taylor3473f882001-02-23 17:55:21 +00009157#ifdef DEBUG_PUSH
9158 switch (ctxt->instate) {
9159 case XML_PARSER_EOF:
9160 xmlGenericError(xmlGenericErrorContext,
9161 "PP: try EOF\n"); break;
9162 case XML_PARSER_START:
9163 xmlGenericError(xmlGenericErrorContext,
9164 "PP: try START\n"); break;
9165 case XML_PARSER_MISC:
9166 xmlGenericError(xmlGenericErrorContext,
9167 "PP: try MISC\n");break;
9168 case XML_PARSER_COMMENT:
9169 xmlGenericError(xmlGenericErrorContext,
9170 "PP: try COMMENT\n");break;
9171 case XML_PARSER_PROLOG:
9172 xmlGenericError(xmlGenericErrorContext,
9173 "PP: try PROLOG\n");break;
9174 case XML_PARSER_START_TAG:
9175 xmlGenericError(xmlGenericErrorContext,
9176 "PP: try START_TAG\n");break;
9177 case XML_PARSER_CONTENT:
9178 xmlGenericError(xmlGenericErrorContext,
9179 "PP: try CONTENT\n");break;
9180 case XML_PARSER_CDATA_SECTION:
9181 xmlGenericError(xmlGenericErrorContext,
9182 "PP: try CDATA_SECTION\n");break;
9183 case XML_PARSER_END_TAG:
9184 xmlGenericError(xmlGenericErrorContext,
9185 "PP: try END_TAG\n");break;
9186 case XML_PARSER_ENTITY_DECL:
9187 xmlGenericError(xmlGenericErrorContext,
9188 "PP: try ENTITY_DECL\n");break;
9189 case XML_PARSER_ENTITY_VALUE:
9190 xmlGenericError(xmlGenericErrorContext,
9191 "PP: try ENTITY_VALUE\n");break;
9192 case XML_PARSER_ATTRIBUTE_VALUE:
9193 xmlGenericError(xmlGenericErrorContext,
9194 "PP: try ATTRIBUTE_VALUE\n");break;
9195 case XML_PARSER_DTD:
9196 xmlGenericError(xmlGenericErrorContext,
9197 "PP: try DTD\n");break;
9198 case XML_PARSER_EPILOG:
9199 xmlGenericError(xmlGenericErrorContext,
9200 "PP: try EPILOG\n");break;
9201 case XML_PARSER_PI:
9202 xmlGenericError(xmlGenericErrorContext,
9203 "PP: try PI\n");break;
9204 case XML_PARSER_IGNORE:
9205 xmlGenericError(xmlGenericErrorContext,
9206 "PP: try IGNORE\n");break;
9207 }
9208#endif
9209
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009210 if ((ctxt->input != NULL) &&
9211 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009212 xmlSHRINK(ctxt);
9213 ctxt->checkIndex = 0;
9214 }
9215 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009216
Daniel Veillarda880b122003-04-21 21:36:41 +00009217 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009218 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009219 return(0);
9220
9221
Owen Taylor3473f882001-02-23 17:55:21 +00009222 /*
9223 * Pop-up of finished entities.
9224 */
9225 while ((RAW == 0) && (ctxt->inputNr > 1))
9226 xmlPopInput(ctxt);
9227
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009228 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009229 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009230 avail = ctxt->input->length -
9231 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009232 else {
9233 /*
9234 * If we are operating on converted input, try to flush
9235 * remainng chars to avoid them stalling in the non-converted
9236 * buffer.
9237 */
9238 if ((ctxt->input->buf->raw != NULL) &&
9239 (ctxt->input->buf->raw->use > 0)) {
9240 int base = ctxt->input->base -
9241 ctxt->input->buf->buffer->content;
9242 int current = ctxt->input->cur - ctxt->input->base;
9243
9244 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9245 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9246 ctxt->input->cur = ctxt->input->base + current;
9247 ctxt->input->end =
9248 &ctxt->input->buf->buffer->content[
9249 ctxt->input->buf->buffer->use];
9250 }
9251 avail = ctxt->input->buf->buffer->use -
9252 (ctxt->input->cur - ctxt->input->base);
9253 }
Owen Taylor3473f882001-02-23 17:55:21 +00009254 if (avail < 1)
9255 goto done;
9256 switch (ctxt->instate) {
9257 case XML_PARSER_EOF:
9258 /*
9259 * Document parsing is done !
9260 */
9261 goto done;
9262 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009263 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9264 xmlChar start[4];
9265 xmlCharEncoding enc;
9266
9267 /*
9268 * Very first chars read from the document flow.
9269 */
9270 if (avail < 4)
9271 goto done;
9272
9273 /*
9274 * Get the 4 first bytes and decode the charset
9275 * if enc != XML_CHAR_ENCODING_NONE
9276 * plug some encoding conversion routines.
9277 */
9278 start[0] = RAW;
9279 start[1] = NXT(1);
9280 start[2] = NXT(2);
9281 start[3] = NXT(3);
9282 enc = xmlDetectCharEncoding(start, 4);
9283 if (enc != XML_CHAR_ENCODING_NONE) {
9284 xmlSwitchEncoding(ctxt, enc);
9285 }
9286 break;
9287 }
Owen Taylor3473f882001-02-23 17:55:21 +00009288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009289 if (avail < 2)
9290 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009291 cur = ctxt->input->cur[0];
9292 next = ctxt->input->cur[1];
9293 if (cur == 0) {
9294 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9295 ctxt->sax->setDocumentLocator(ctxt->userData,
9296 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009297 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009298 ctxt->instate = XML_PARSER_EOF;
9299#ifdef DEBUG_PUSH
9300 xmlGenericError(xmlGenericErrorContext,
9301 "PP: entering EOF\n");
9302#endif
9303 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9304 ctxt->sax->endDocument(ctxt->userData);
9305 goto done;
9306 }
9307 if ((cur == '<') && (next == '?')) {
9308 /* PI or XML decl */
9309 if (avail < 5) return(ret);
9310 if ((!terminate) &&
9311 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9312 return(ret);
9313 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9314 ctxt->sax->setDocumentLocator(ctxt->userData,
9315 &xmlDefaultSAXLocator);
9316 if ((ctxt->input->cur[2] == 'x') &&
9317 (ctxt->input->cur[3] == 'm') &&
9318 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009319 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009320 ret += 5;
9321#ifdef DEBUG_PUSH
9322 xmlGenericError(xmlGenericErrorContext,
9323 "PP: Parsing XML Decl\n");
9324#endif
9325 xmlParseXMLDecl(ctxt);
9326 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9327 /*
9328 * The XML REC instructs us to stop parsing right
9329 * here
9330 */
9331 ctxt->instate = XML_PARSER_EOF;
9332 return(0);
9333 }
9334 ctxt->standalone = ctxt->input->standalone;
9335 if ((ctxt->encoding == NULL) &&
9336 (ctxt->input->encoding != NULL))
9337 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9338 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9339 (!ctxt->disableSAX))
9340 ctxt->sax->startDocument(ctxt->userData);
9341 ctxt->instate = XML_PARSER_MISC;
9342#ifdef DEBUG_PUSH
9343 xmlGenericError(xmlGenericErrorContext,
9344 "PP: entering MISC\n");
9345#endif
9346 } else {
9347 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9348 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9349 (!ctxt->disableSAX))
9350 ctxt->sax->startDocument(ctxt->userData);
9351 ctxt->instate = XML_PARSER_MISC;
9352#ifdef DEBUG_PUSH
9353 xmlGenericError(xmlGenericErrorContext,
9354 "PP: entering MISC\n");
9355#endif
9356 }
9357 } else {
9358 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9359 ctxt->sax->setDocumentLocator(ctxt->userData,
9360 &xmlDefaultSAXLocator);
9361 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009362 if (ctxt->version == NULL) {
9363 xmlErrMemory(ctxt, NULL);
9364 break;
9365 }
Owen Taylor3473f882001-02-23 17:55:21 +00009366 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9367 (!ctxt->disableSAX))
9368 ctxt->sax->startDocument(ctxt->userData);
9369 ctxt->instate = XML_PARSER_MISC;
9370#ifdef DEBUG_PUSH
9371 xmlGenericError(xmlGenericErrorContext,
9372 "PP: entering MISC\n");
9373#endif
9374 }
9375 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009377 const xmlChar *name;
9378 const xmlChar *prefix;
9379 const xmlChar *URI;
9380 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009381
9382 if ((avail < 2) && (ctxt->inputNr == 1))
9383 goto done;
9384 cur = ctxt->input->cur[0];
9385 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009386 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009387 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009388 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9389 ctxt->sax->endDocument(ctxt->userData);
9390 goto done;
9391 }
9392 if (!terminate) {
9393 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009394 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009395 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009396 goto done;
9397 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9398 goto done;
9399 }
9400 }
9401 if (ctxt->spaceNr == 0)
9402 spacePush(ctxt, -1);
9403 else
9404 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009405#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009406 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009407#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009408 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009409#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009410 else
9411 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009412#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009413 if (name == NULL) {
9414 spacePop(ctxt);
9415 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009416 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9417 ctxt->sax->endDocument(ctxt->userData);
9418 goto done;
9419 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009420#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009421 /*
9422 * [ VC: Root Element Type ]
9423 * The Name in the document type declaration must match
9424 * the element type of the root element.
9425 */
9426 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9427 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9428 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009429#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009430
9431 /*
9432 * Check for an Empty Element.
9433 */
9434 if ((RAW == '/') && (NXT(1) == '>')) {
9435 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009436
9437 if (ctxt->sax2) {
9438 if ((ctxt->sax != NULL) &&
9439 (ctxt->sax->endElementNs != NULL) &&
9440 (!ctxt->disableSAX))
9441 ctxt->sax->endElementNs(ctxt->userData, name,
9442 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009443 if (ctxt->nsNr - nsNr > 0)
9444 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009445#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009446 } else {
9447 if ((ctxt->sax != NULL) &&
9448 (ctxt->sax->endElement != NULL) &&
9449 (!ctxt->disableSAX))
9450 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009451#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009452 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009453 spacePop(ctxt);
9454 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009455 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009456 } else {
9457 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009458 }
9459 break;
9460 }
9461 if (RAW == '>') {
9462 NEXT;
9463 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009464 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009465 "Couldn't find end of Start Tag %s\n",
9466 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009467 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009468 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009469 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009470 if (ctxt->sax2)
9471 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009472#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009473 else
9474 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009475#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009478 break;
9479 }
9480 case XML_PARSER_CONTENT: {
9481 const xmlChar *test;
9482 unsigned int cons;
9483 if ((avail < 2) && (ctxt->inputNr == 1))
9484 goto done;
9485 cur = ctxt->input->cur[0];
9486 next = ctxt->input->cur[1];
9487
9488 test = CUR_PTR;
9489 cons = ctxt->input->consumed;
9490 if ((cur == '<') && (next == '/')) {
9491 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009492 break;
9493 } else if ((cur == '<') && (next == '?')) {
9494 if ((!terminate) &&
9495 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9496 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 xmlParsePI(ctxt);
9498 } else if ((cur == '<') && (next != '!')) {
9499 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009500 break;
9501 } else if ((cur == '<') && (next == '!') &&
9502 (ctxt->input->cur[2] == '-') &&
9503 (ctxt->input->cur[3] == '-')) {
9504 if ((!terminate) &&
9505 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9506 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009507 xmlParseComment(ctxt);
9508 ctxt->instate = XML_PARSER_CONTENT;
9509 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9510 (ctxt->input->cur[2] == '[') &&
9511 (ctxt->input->cur[3] == 'C') &&
9512 (ctxt->input->cur[4] == 'D') &&
9513 (ctxt->input->cur[5] == 'A') &&
9514 (ctxt->input->cur[6] == 'T') &&
9515 (ctxt->input->cur[7] == 'A') &&
9516 (ctxt->input->cur[8] == '[')) {
9517 SKIP(9);
9518 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009519 break;
9520 } else if ((cur == '<') && (next == '!') &&
9521 (avail < 9)) {
9522 goto done;
9523 } else if (cur == '&') {
9524 if ((!terminate) &&
9525 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9526 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009527 xmlParseReference(ctxt);
9528 } else {
9529 /* TODO Avoid the extra copy, handle directly !!! */
9530 /*
9531 * Goal of the following test is:
9532 * - minimize calls to the SAX 'character' callback
9533 * when they are mergeable
9534 * - handle an problem for isBlank when we only parse
9535 * a sequence of blank chars and the next one is
9536 * not available to check against '<' presence.
9537 * - tries to homogenize the differences in SAX
9538 * callbacks between the push and pull versions
9539 * of the parser.
9540 */
9541 if ((ctxt->inputNr == 1) &&
9542 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9543 if (!terminate) {
9544 if (ctxt->progressive) {
9545 if ((lastlt == NULL) ||
9546 (ctxt->input->cur > lastlt))
9547 goto done;
9548 } else if (xmlParseLookupSequence(ctxt,
9549 '<', 0, 0) < 0) {
9550 goto done;
9551 }
9552 }
9553 }
9554 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 xmlParseCharData(ctxt, 0);
9556 }
9557 /*
9558 * Pop-up of finished entities.
9559 */
9560 while ((RAW == 0) && (ctxt->inputNr > 1))
9561 xmlPopInput(ctxt);
9562 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009563 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9564 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009565 ctxt->instate = XML_PARSER_EOF;
9566 break;
9567 }
9568 break;
9569 }
9570 case XML_PARSER_END_TAG:
9571 if (avail < 2)
9572 goto done;
9573 if (!terminate) {
9574 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009575 /* > can be found unescaped in attribute values */
9576 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009577 goto done;
9578 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9579 goto done;
9580 }
9581 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009582 if (ctxt->sax2) {
9583 xmlParseEndTag2(ctxt,
9584 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9585 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009586 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009587 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009588 }
9589#ifdef LIBXML_SAX1_ENABLED
9590 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009591 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009592#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009593 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009594 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009595 } else {
9596 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 }
9598 break;
9599 case XML_PARSER_CDATA_SECTION: {
9600 /*
9601 * The Push mode need to have the SAX callback for
9602 * cdataBlock merge back contiguous callbacks.
9603 */
9604 int base;
9605
9606 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9607 if (base < 0) {
9608 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9609 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9610 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009611 ctxt->sax->cdataBlock(ctxt->userData,
9612 ctxt->input->cur,
9613 XML_PARSER_BIG_BUFFER_SIZE);
9614 else if (ctxt->sax->characters != NULL)
9615 ctxt->sax->characters(ctxt->userData,
9616 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009617 XML_PARSER_BIG_BUFFER_SIZE);
9618 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009619 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009620 ctxt->checkIndex = 0;
9621 }
9622 goto done;
9623 } else {
9624 if ((ctxt->sax != NULL) && (base > 0) &&
9625 (!ctxt->disableSAX)) {
9626 if (ctxt->sax->cdataBlock != NULL)
9627 ctxt->sax->cdataBlock(ctxt->userData,
9628 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009629 else if (ctxt->sax->characters != NULL)
9630 ctxt->sax->characters(ctxt->userData,
9631 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009632 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009633 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009634 ctxt->checkIndex = 0;
9635 ctxt->instate = XML_PARSER_CONTENT;
9636#ifdef DEBUG_PUSH
9637 xmlGenericError(xmlGenericErrorContext,
9638 "PP: entering CONTENT\n");
9639#endif
9640 }
9641 break;
9642 }
Owen Taylor3473f882001-02-23 17:55:21 +00009643 case XML_PARSER_MISC:
9644 SKIP_BLANKS;
9645 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009646 avail = ctxt->input->length -
9647 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009648 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009649 avail = ctxt->input->buf->buffer->use -
9650 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009651 if (avail < 2)
9652 goto done;
9653 cur = ctxt->input->cur[0];
9654 next = ctxt->input->cur[1];
9655 if ((cur == '<') && (next == '?')) {
9656 if ((!terminate) &&
9657 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9658 goto done;
9659#ifdef DEBUG_PUSH
9660 xmlGenericError(xmlGenericErrorContext,
9661 "PP: Parsing PI\n");
9662#endif
9663 xmlParsePI(ctxt);
9664 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009665 (ctxt->input->cur[2] == '-') &&
9666 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009667 if ((!terminate) &&
9668 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9669 goto done;
9670#ifdef DEBUG_PUSH
9671 xmlGenericError(xmlGenericErrorContext,
9672 "PP: Parsing Comment\n");
9673#endif
9674 xmlParseComment(ctxt);
9675 ctxt->instate = XML_PARSER_MISC;
9676 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009677 (ctxt->input->cur[2] == 'D') &&
9678 (ctxt->input->cur[3] == 'O') &&
9679 (ctxt->input->cur[4] == 'C') &&
9680 (ctxt->input->cur[5] == 'T') &&
9681 (ctxt->input->cur[6] == 'Y') &&
9682 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009683 (ctxt->input->cur[8] == 'E')) {
9684 if ((!terminate) &&
9685 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9686 goto done;
9687#ifdef DEBUG_PUSH
9688 xmlGenericError(xmlGenericErrorContext,
9689 "PP: Parsing internal subset\n");
9690#endif
9691 ctxt->inSubset = 1;
9692 xmlParseDocTypeDecl(ctxt);
9693 if (RAW == '[') {
9694 ctxt->instate = XML_PARSER_DTD;
9695#ifdef DEBUG_PUSH
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: entering DTD\n");
9698#endif
9699 } else {
9700 /*
9701 * Create and update the external subset.
9702 */
9703 ctxt->inSubset = 2;
9704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9705 (ctxt->sax->externalSubset != NULL))
9706 ctxt->sax->externalSubset(ctxt->userData,
9707 ctxt->intSubName, ctxt->extSubSystem,
9708 ctxt->extSubURI);
9709 ctxt->inSubset = 0;
9710 ctxt->instate = XML_PARSER_PROLOG;
9711#ifdef DEBUG_PUSH
9712 xmlGenericError(xmlGenericErrorContext,
9713 "PP: entering PROLOG\n");
9714#endif
9715 }
9716 } else if ((cur == '<') && (next == '!') &&
9717 (avail < 9)) {
9718 goto done;
9719 } else {
9720 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009721 ctxt->progressive = 1;
9722 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009723#ifdef DEBUG_PUSH
9724 xmlGenericError(xmlGenericErrorContext,
9725 "PP: entering START_TAG\n");
9726#endif
9727 }
9728 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009729 case XML_PARSER_PROLOG:
9730 SKIP_BLANKS;
9731 if (ctxt->input->buf == NULL)
9732 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9733 else
9734 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9735 if (avail < 2)
9736 goto done;
9737 cur = ctxt->input->cur[0];
9738 next = ctxt->input->cur[1];
9739 if ((cur == '<') && (next == '?')) {
9740 if ((!terminate) &&
9741 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9742 goto done;
9743#ifdef DEBUG_PUSH
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: Parsing PI\n");
9746#endif
9747 xmlParsePI(ctxt);
9748 } else if ((cur == '<') && (next == '!') &&
9749 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9750 if ((!terminate) &&
9751 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9752 goto done;
9753#ifdef DEBUG_PUSH
9754 xmlGenericError(xmlGenericErrorContext,
9755 "PP: Parsing Comment\n");
9756#endif
9757 xmlParseComment(ctxt);
9758 ctxt->instate = XML_PARSER_PROLOG;
9759 } else if ((cur == '<') && (next == '!') &&
9760 (avail < 4)) {
9761 goto done;
9762 } else {
9763 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009764 if (ctxt->progressive == 0)
9765 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009766 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009767#ifdef DEBUG_PUSH
9768 xmlGenericError(xmlGenericErrorContext,
9769 "PP: entering START_TAG\n");
9770#endif
9771 }
9772 break;
9773 case XML_PARSER_EPILOG:
9774 SKIP_BLANKS;
9775 if (ctxt->input->buf == NULL)
9776 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9777 else
9778 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9779 if (avail < 2)
9780 goto done;
9781 cur = ctxt->input->cur[0];
9782 next = ctxt->input->cur[1];
9783 if ((cur == '<') && (next == '?')) {
9784 if ((!terminate) &&
9785 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9786 goto done;
9787#ifdef DEBUG_PUSH
9788 xmlGenericError(xmlGenericErrorContext,
9789 "PP: Parsing PI\n");
9790#endif
9791 xmlParsePI(ctxt);
9792 ctxt->instate = XML_PARSER_EPILOG;
9793 } else if ((cur == '<') && (next == '!') &&
9794 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9795 if ((!terminate) &&
9796 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9797 goto done;
9798#ifdef DEBUG_PUSH
9799 xmlGenericError(xmlGenericErrorContext,
9800 "PP: Parsing Comment\n");
9801#endif
9802 xmlParseComment(ctxt);
9803 ctxt->instate = XML_PARSER_EPILOG;
9804 } else if ((cur == '<') && (next == '!') &&
9805 (avail < 4)) {
9806 goto done;
9807 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009808 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009809 ctxt->instate = XML_PARSER_EOF;
9810#ifdef DEBUG_PUSH
9811 xmlGenericError(xmlGenericErrorContext,
9812 "PP: entering EOF\n");
9813#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009814 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009815 ctxt->sax->endDocument(ctxt->userData);
9816 goto done;
9817 }
9818 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009819 case XML_PARSER_DTD: {
9820 /*
9821 * Sorry but progressive parsing of the internal subset
9822 * is not expected to be supported. We first check that
9823 * the full content of the internal subset is available and
9824 * the parsing is launched only at that point.
9825 * Internal subset ends up with "']' S? '>'" in an unescaped
9826 * section and not in a ']]>' sequence which are conditional
9827 * sections (whoever argued to keep that crap in XML deserve
9828 * a place in hell !).
9829 */
9830 int base, i;
9831 xmlChar *buf;
9832 xmlChar quote = 0;
9833
9834 base = ctxt->input->cur - ctxt->input->base;
9835 if (base < 0) return(0);
9836 if (ctxt->checkIndex > base)
9837 base = ctxt->checkIndex;
9838 buf = ctxt->input->buf->buffer->content;
9839 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9840 base++) {
9841 if (quote != 0) {
9842 if (buf[base] == quote)
9843 quote = 0;
9844 continue;
9845 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009846 if ((quote == 0) && (buf[base] == '<')) {
9847 int found = 0;
9848 /* special handling of comments */
9849 if (((unsigned int) base + 4 <
9850 ctxt->input->buf->buffer->use) &&
9851 (buf[base + 1] == '!') &&
9852 (buf[base + 2] == '-') &&
9853 (buf[base + 3] == '-')) {
9854 for (;(unsigned int) base + 3 <
9855 ctxt->input->buf->buffer->use; base++) {
9856 if ((buf[base] == '-') &&
9857 (buf[base + 1] == '-') &&
9858 (buf[base + 2] == '>')) {
9859 found = 1;
9860 base += 2;
9861 break;
9862 }
9863 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009864 if (!found) {
9865#if 0
9866 fprintf(stderr, "unfinished comment\n");
9867#endif
9868 break; /* for */
9869 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009870 continue;
9871 }
9872 }
Owen Taylor3473f882001-02-23 17:55:21 +00009873 if (buf[base] == '"') {
9874 quote = '"';
9875 continue;
9876 }
9877 if (buf[base] == '\'') {
9878 quote = '\'';
9879 continue;
9880 }
9881 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009882#if 0
9883 fprintf(stderr, "%c%c%c%c: ", buf[base],
9884 buf[base + 1], buf[base + 2], buf[base + 3]);
9885#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009886 if ((unsigned int) base +1 >=
9887 ctxt->input->buf->buffer->use)
9888 break;
9889 if (buf[base + 1] == ']') {
9890 /* conditional crap, skip both ']' ! */
9891 base++;
9892 continue;
9893 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009894 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009895 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9896 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009897 if (buf[base + i] == '>') {
9898#if 0
9899 fprintf(stderr, "found\n");
9900#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009901 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009902 }
9903 if (!IS_BLANK_CH(buf[base + i])) {
9904#if 0
9905 fprintf(stderr, "not found\n");
9906#endif
9907 goto not_end_of_int_subset;
9908 }
Owen Taylor3473f882001-02-23 17:55:21 +00009909 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009910#if 0
9911 fprintf(stderr, "end of stream\n");
9912#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009913 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009914
Owen Taylor3473f882001-02-23 17:55:21 +00009915 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009916not_end_of_int_subset:
9917 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +00009918 }
9919 /*
9920 * We didn't found the end of the Internal subset
9921 */
Owen Taylor3473f882001-02-23 17:55:21 +00009922#ifdef DEBUG_PUSH
9923 if (next == 0)
9924 xmlGenericError(xmlGenericErrorContext,
9925 "PP: lookup of int subset end filed\n");
9926#endif
9927 goto done;
9928
9929found_end_int_subset:
9930 xmlParseInternalSubset(ctxt);
9931 ctxt->inSubset = 2;
9932 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9933 (ctxt->sax->externalSubset != NULL))
9934 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9935 ctxt->extSubSystem, ctxt->extSubURI);
9936 ctxt->inSubset = 0;
9937 ctxt->instate = XML_PARSER_PROLOG;
9938 ctxt->checkIndex = 0;
9939#ifdef DEBUG_PUSH
9940 xmlGenericError(xmlGenericErrorContext,
9941 "PP: entering PROLOG\n");
9942#endif
9943 break;
9944 }
9945 case XML_PARSER_COMMENT:
9946 xmlGenericError(xmlGenericErrorContext,
9947 "PP: internal error, state == COMMENT\n");
9948 ctxt->instate = XML_PARSER_CONTENT;
9949#ifdef DEBUG_PUSH
9950 xmlGenericError(xmlGenericErrorContext,
9951 "PP: entering CONTENT\n");
9952#endif
9953 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009954 case XML_PARSER_IGNORE:
9955 xmlGenericError(xmlGenericErrorContext,
9956 "PP: internal error, state == IGNORE");
9957 ctxt->instate = XML_PARSER_DTD;
9958#ifdef DEBUG_PUSH
9959 xmlGenericError(xmlGenericErrorContext,
9960 "PP: entering DTD\n");
9961#endif
9962 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009963 case XML_PARSER_PI:
9964 xmlGenericError(xmlGenericErrorContext,
9965 "PP: internal error, state == PI\n");
9966 ctxt->instate = XML_PARSER_CONTENT;
9967#ifdef DEBUG_PUSH
9968 xmlGenericError(xmlGenericErrorContext,
9969 "PP: entering CONTENT\n");
9970#endif
9971 break;
9972 case XML_PARSER_ENTITY_DECL:
9973 xmlGenericError(xmlGenericErrorContext,
9974 "PP: internal error, state == ENTITY_DECL\n");
9975 ctxt->instate = XML_PARSER_DTD;
9976#ifdef DEBUG_PUSH
9977 xmlGenericError(xmlGenericErrorContext,
9978 "PP: entering DTD\n");
9979#endif
9980 break;
9981 case XML_PARSER_ENTITY_VALUE:
9982 xmlGenericError(xmlGenericErrorContext,
9983 "PP: internal error, state == ENTITY_VALUE\n");
9984 ctxt->instate = XML_PARSER_CONTENT;
9985#ifdef DEBUG_PUSH
9986 xmlGenericError(xmlGenericErrorContext,
9987 "PP: entering DTD\n");
9988#endif
9989 break;
9990 case XML_PARSER_ATTRIBUTE_VALUE:
9991 xmlGenericError(xmlGenericErrorContext,
9992 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9993 ctxt->instate = XML_PARSER_START_TAG;
9994#ifdef DEBUG_PUSH
9995 xmlGenericError(xmlGenericErrorContext,
9996 "PP: entering START_TAG\n");
9997#endif
9998 break;
9999 case XML_PARSER_SYSTEM_LITERAL:
10000 xmlGenericError(xmlGenericErrorContext,
10001 "PP: internal error, state == SYSTEM_LITERAL\n");
10002 ctxt->instate = XML_PARSER_START_TAG;
10003#ifdef DEBUG_PUSH
10004 xmlGenericError(xmlGenericErrorContext,
10005 "PP: entering START_TAG\n");
10006#endif
10007 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010008 case XML_PARSER_PUBLIC_LITERAL:
10009 xmlGenericError(xmlGenericErrorContext,
10010 "PP: internal error, state == PUBLIC_LITERAL\n");
10011 ctxt->instate = XML_PARSER_START_TAG;
10012#ifdef DEBUG_PUSH
10013 xmlGenericError(xmlGenericErrorContext,
10014 "PP: entering START_TAG\n");
10015#endif
10016 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010017 }
10018 }
10019done:
10020#ifdef DEBUG_PUSH
10021 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10022#endif
10023 return(ret);
10024}
10025
10026/**
Owen Taylor3473f882001-02-23 17:55:21 +000010027 * xmlParseChunk:
10028 * @ctxt: an XML parser context
10029 * @chunk: an char array
10030 * @size: the size in byte of the chunk
10031 * @terminate: last chunk indicator
10032 *
10033 * Parse a Chunk of memory
10034 *
10035 * Returns zero if no error, the xmlParserErrors otherwise.
10036 */
10037int
10038xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10039 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010040 if (ctxt == NULL)
10041 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010042 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010043 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010044 if (ctxt->instate == XML_PARSER_START)
10045 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010046 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10047 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10048 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10049 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010050 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010051
William M. Bracka3215c72004-07-31 16:24:01 +000010052 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10053 if (res < 0) {
10054 ctxt->errNo = XML_PARSER_EOF;
10055 ctxt->disableSAX = 1;
10056 return (XML_PARSER_EOF);
10057 }
Owen Taylor3473f882001-02-23 17:55:21 +000010058 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10059 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010060 ctxt->input->end =
10061 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010062#ifdef DEBUG_PUSH
10063 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10064#endif
10065
Owen Taylor3473f882001-02-23 17:55:21 +000010066 } else if (ctxt->instate != XML_PARSER_EOF) {
10067 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10068 xmlParserInputBufferPtr in = ctxt->input->buf;
10069 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10070 (in->raw != NULL)) {
10071 int nbchars;
10072
10073 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10074 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010075 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010076 xmlGenericError(xmlGenericErrorContext,
10077 "xmlParseChunk: encoder error\n");
10078 return(XML_ERR_INVALID_ENCODING);
10079 }
10080 }
10081 }
10082 }
10083 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010084 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010085 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010086 if (terminate) {
10087 /*
10088 * Check for termination
10089 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010090 int avail = 0;
10091
10092 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010093 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010094 avail = ctxt->input->length -
10095 (ctxt->input->cur - ctxt->input->base);
10096 else
10097 avail = ctxt->input->buf->buffer->use -
10098 (ctxt->input->cur - ctxt->input->base);
10099 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010100
Owen Taylor3473f882001-02-23 17:55:21 +000010101 if ((ctxt->instate != XML_PARSER_EOF) &&
10102 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010103 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010104 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010105 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010106 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010107 }
Owen Taylor3473f882001-02-23 17:55:21 +000010108 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010109 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010110 ctxt->sax->endDocument(ctxt->userData);
10111 }
10112 ctxt->instate = XML_PARSER_EOF;
10113 }
10114 return((xmlParserErrors) ctxt->errNo);
10115}
10116
10117/************************************************************************
10118 * *
10119 * I/O front end functions to the parser *
10120 * *
10121 ************************************************************************/
10122
10123/**
10124 * xmlStopParser:
10125 * @ctxt: an XML parser context
10126 *
10127 * Blocks further parser processing
10128 */
10129void
10130xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +000010131 if (ctxt == NULL)
10132 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010133 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +000010134 ctxt->disableSAX = 1;
William M. Brack230c5502004-12-20 16:18:49 +000010135 if (ctxt->input != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010136 ctxt->input->cur = BAD_CAST"";
William M. Brack230c5502004-12-20 16:18:49 +000010137 ctxt->input->base = ctxt->input->cur;
10138 }
Owen Taylor3473f882001-02-23 17:55:21 +000010139}
10140
10141/**
10142 * xmlCreatePushParserCtxt:
10143 * @sax: a SAX handler
10144 * @user_data: The user data returned on SAX callbacks
10145 * @chunk: a pointer to an array of chars
10146 * @size: number of chars in the array
10147 * @filename: an optional file name or URI
10148 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010149 * Create a parser context for using the XML parser in push mode.
10150 * If @buffer and @size are non-NULL, the data is used to detect
10151 * the encoding. The remaining characters will be parsed so they
10152 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010153 * To allow content encoding detection, @size should be >= 4
10154 * The value of @filename is used for fetching external entities
10155 * and error/warning reports.
10156 *
10157 * Returns the new parser context or NULL
10158 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010159
Owen Taylor3473f882001-02-23 17:55:21 +000010160xmlParserCtxtPtr
10161xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10162 const char *chunk, int size, const char *filename) {
10163 xmlParserCtxtPtr ctxt;
10164 xmlParserInputPtr inputStream;
10165 xmlParserInputBufferPtr buf;
10166 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10167
10168 /*
10169 * plug some encoding conversion routines
10170 */
10171 if ((chunk != NULL) && (size >= 4))
10172 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10173
10174 buf = xmlAllocParserInputBuffer(enc);
10175 if (buf == NULL) return(NULL);
10176
10177 ctxt = xmlNewParserCtxt();
10178 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010179 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010180 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010181 return(NULL);
10182 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010183 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010184 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10185 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010186 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010187 xmlFreeParserInputBuffer(buf);
10188 xmlFreeParserCtxt(ctxt);
10189 return(NULL);
10190 }
Owen Taylor3473f882001-02-23 17:55:21 +000010191 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010192#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010193 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010194#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010195 xmlFree(ctxt->sax);
10196 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10197 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010198 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010199 xmlFreeParserInputBuffer(buf);
10200 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010201 return(NULL);
10202 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010203 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10204 if (sax->initialized == XML_SAX2_MAGIC)
10205 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10206 else
10207 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010208 if (user_data != NULL)
10209 ctxt->userData = user_data;
10210 }
10211 if (filename == NULL) {
10212 ctxt->directory = NULL;
10213 } else {
10214 ctxt->directory = xmlParserGetDirectory(filename);
10215 }
10216
10217 inputStream = xmlNewInputStream(ctxt);
10218 if (inputStream == NULL) {
10219 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010220 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010221 return(NULL);
10222 }
10223
10224 if (filename == NULL)
10225 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010226 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010227 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010228 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010229 if (inputStream->filename == NULL) {
10230 xmlFreeParserCtxt(ctxt);
10231 xmlFreeParserInputBuffer(buf);
10232 return(NULL);
10233 }
10234 }
Owen Taylor3473f882001-02-23 17:55:21 +000010235 inputStream->buf = buf;
10236 inputStream->base = inputStream->buf->buffer->content;
10237 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010238 inputStream->end =
10239 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010240
10241 inputPush(ctxt, inputStream);
10242
10243 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10244 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010245 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10246 int cur = ctxt->input->cur - ctxt->input->base;
10247
Owen Taylor3473f882001-02-23 17:55:21 +000010248 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010249
10250 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10251 ctxt->input->cur = ctxt->input->base + cur;
10252 ctxt->input->end =
10253 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010254#ifdef DEBUG_PUSH
10255 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10256#endif
10257 }
10258
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010259 if (enc != XML_CHAR_ENCODING_NONE) {
10260 xmlSwitchEncoding(ctxt, enc);
10261 }
10262
Owen Taylor3473f882001-02-23 17:55:21 +000010263 return(ctxt);
10264}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010265#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010266
10267/**
10268 * xmlCreateIOParserCtxt:
10269 * @sax: a SAX handler
10270 * @user_data: The user data returned on SAX callbacks
10271 * @ioread: an I/O read function
10272 * @ioclose: an I/O close function
10273 * @ioctx: an I/O handler
10274 * @enc: the charset encoding if known
10275 *
10276 * Create a parser context for using the XML parser with an existing
10277 * I/O stream
10278 *
10279 * Returns the new parser context or NULL
10280 */
10281xmlParserCtxtPtr
10282xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10283 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10284 void *ioctx, xmlCharEncoding enc) {
10285 xmlParserCtxtPtr ctxt;
10286 xmlParserInputPtr inputStream;
10287 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010288
10289 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010290
10291 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10292 if (buf == NULL) return(NULL);
10293
10294 ctxt = xmlNewParserCtxt();
10295 if (ctxt == NULL) {
10296 xmlFree(buf);
10297 return(NULL);
10298 }
10299 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010300#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010301 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010302#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010303 xmlFree(ctxt->sax);
10304 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10305 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010306 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010307 xmlFree(ctxt);
10308 return(NULL);
10309 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010310 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10311 if (sax->initialized == XML_SAX2_MAGIC)
10312 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10313 else
10314 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010315 if (user_data != NULL)
10316 ctxt->userData = user_data;
10317 }
10318
10319 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10320 if (inputStream == NULL) {
10321 xmlFreeParserCtxt(ctxt);
10322 return(NULL);
10323 }
10324 inputPush(ctxt, inputStream);
10325
10326 return(ctxt);
10327}
10328
Daniel Veillard4432df22003-09-28 18:58:27 +000010329#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010330/************************************************************************
10331 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010332 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010333 * *
10334 ************************************************************************/
10335
10336/**
10337 * xmlIOParseDTD:
10338 * @sax: the SAX handler block or NULL
10339 * @input: an Input Buffer
10340 * @enc: the charset encoding if known
10341 *
10342 * Load and parse a DTD
10343 *
10344 * Returns the resulting xmlDtdPtr or NULL in case of error.
10345 * @input will be freed at parsing end.
10346 */
10347
10348xmlDtdPtr
10349xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10350 xmlCharEncoding enc) {
10351 xmlDtdPtr ret = NULL;
10352 xmlParserCtxtPtr ctxt;
10353 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010354 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010355
10356 if (input == NULL)
10357 return(NULL);
10358
10359 ctxt = xmlNewParserCtxt();
10360 if (ctxt == NULL) {
10361 return(NULL);
10362 }
10363
10364 /*
10365 * Set-up the SAX context
10366 */
10367 if (sax != NULL) {
10368 if (ctxt->sax != NULL)
10369 xmlFree(ctxt->sax);
10370 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010371 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010372 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010373 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010374
10375 /*
10376 * generate a parser input from the I/O handler
10377 */
10378
Daniel Veillard43caefb2003-12-07 19:32:22 +000010379 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010380 if (pinput == NULL) {
10381 if (sax != NULL) ctxt->sax = NULL;
10382 xmlFreeParserCtxt(ctxt);
10383 return(NULL);
10384 }
10385
10386 /*
10387 * plug some encoding conversion routines here.
10388 */
10389 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010390 if (enc != XML_CHAR_ENCODING_NONE) {
10391 xmlSwitchEncoding(ctxt, enc);
10392 }
Owen Taylor3473f882001-02-23 17:55:21 +000010393
10394 pinput->filename = NULL;
10395 pinput->line = 1;
10396 pinput->col = 1;
10397 pinput->base = ctxt->input->cur;
10398 pinput->cur = ctxt->input->cur;
10399 pinput->free = NULL;
10400
10401 /*
10402 * let's parse that entity knowing it's an external subset.
10403 */
10404 ctxt->inSubset = 2;
10405 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10406 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10407 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010408
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010409 if ((enc == XML_CHAR_ENCODING_NONE) &&
10410 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010411 /*
10412 * Get the 4 first bytes and decode the charset
10413 * if enc != XML_CHAR_ENCODING_NONE
10414 * plug some encoding conversion routines.
10415 */
10416 start[0] = RAW;
10417 start[1] = NXT(1);
10418 start[2] = NXT(2);
10419 start[3] = NXT(3);
10420 enc = xmlDetectCharEncoding(start, 4);
10421 if (enc != XML_CHAR_ENCODING_NONE) {
10422 xmlSwitchEncoding(ctxt, enc);
10423 }
10424 }
10425
Owen Taylor3473f882001-02-23 17:55:21 +000010426 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10427
10428 if (ctxt->myDoc != NULL) {
10429 if (ctxt->wellFormed) {
10430 ret = ctxt->myDoc->extSubset;
10431 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010432 if (ret != NULL) {
10433 xmlNodePtr tmp;
10434
10435 ret->doc = NULL;
10436 tmp = ret->children;
10437 while (tmp != NULL) {
10438 tmp->doc = NULL;
10439 tmp = tmp->next;
10440 }
10441 }
Owen Taylor3473f882001-02-23 17:55:21 +000010442 } else {
10443 ret = NULL;
10444 }
10445 xmlFreeDoc(ctxt->myDoc);
10446 ctxt->myDoc = NULL;
10447 }
10448 if (sax != NULL) ctxt->sax = NULL;
10449 xmlFreeParserCtxt(ctxt);
10450
10451 return(ret);
10452}
10453
10454/**
10455 * xmlSAXParseDTD:
10456 * @sax: the SAX handler block
10457 * @ExternalID: a NAME* containing the External ID of the DTD
10458 * @SystemID: a NAME* containing the URL to the DTD
10459 *
10460 * Load and parse an external subset.
10461 *
10462 * Returns the resulting xmlDtdPtr or NULL in case of error.
10463 */
10464
10465xmlDtdPtr
10466xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10467 const xmlChar *SystemID) {
10468 xmlDtdPtr ret = NULL;
10469 xmlParserCtxtPtr ctxt;
10470 xmlParserInputPtr input = NULL;
10471 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010472 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010473
10474 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10475
10476 ctxt = xmlNewParserCtxt();
10477 if (ctxt == NULL) {
10478 return(NULL);
10479 }
10480
10481 /*
10482 * Set-up the SAX context
10483 */
10484 if (sax != NULL) {
10485 if (ctxt->sax != NULL)
10486 xmlFree(ctxt->sax);
10487 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010488 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010489 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010490
10491 /*
10492 * Canonicalise the system ID
10493 */
10494 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010495 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010496 xmlFreeParserCtxt(ctxt);
10497 return(NULL);
10498 }
Owen Taylor3473f882001-02-23 17:55:21 +000010499
10500 /*
10501 * Ask the Entity resolver to load the damn thing
10502 */
10503
10504 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010505 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 if (input == NULL) {
10507 if (sax != NULL) ctxt->sax = NULL;
10508 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010509 if (systemIdCanonic != NULL)
10510 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010511 return(NULL);
10512 }
10513
10514 /*
10515 * plug some encoding conversion routines here.
10516 */
10517 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010518 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10519 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10520 xmlSwitchEncoding(ctxt, enc);
10521 }
Owen Taylor3473f882001-02-23 17:55:21 +000010522
10523 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010524 input->filename = (char *) systemIdCanonic;
10525 else
10526 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010527 input->line = 1;
10528 input->col = 1;
10529 input->base = ctxt->input->cur;
10530 input->cur = ctxt->input->cur;
10531 input->free = NULL;
10532
10533 /*
10534 * let's parse that entity knowing it's an external subset.
10535 */
10536 ctxt->inSubset = 2;
10537 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10538 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10539 ExternalID, SystemID);
10540 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10541
10542 if (ctxt->myDoc != NULL) {
10543 if (ctxt->wellFormed) {
10544 ret = ctxt->myDoc->extSubset;
10545 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010546 if (ret != NULL) {
10547 xmlNodePtr tmp;
10548
10549 ret->doc = NULL;
10550 tmp = ret->children;
10551 while (tmp != NULL) {
10552 tmp->doc = NULL;
10553 tmp = tmp->next;
10554 }
10555 }
Owen Taylor3473f882001-02-23 17:55:21 +000010556 } else {
10557 ret = NULL;
10558 }
10559 xmlFreeDoc(ctxt->myDoc);
10560 ctxt->myDoc = NULL;
10561 }
10562 if (sax != NULL) ctxt->sax = NULL;
10563 xmlFreeParserCtxt(ctxt);
10564
10565 return(ret);
10566}
10567
Daniel Veillard4432df22003-09-28 18:58:27 +000010568
Owen Taylor3473f882001-02-23 17:55:21 +000010569/**
10570 * xmlParseDTD:
10571 * @ExternalID: a NAME* containing the External ID of the DTD
10572 * @SystemID: a NAME* containing the URL to the DTD
10573 *
10574 * Load and parse an external subset.
10575 *
10576 * Returns the resulting xmlDtdPtr or NULL in case of error.
10577 */
10578
10579xmlDtdPtr
10580xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10581 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10582}
Daniel Veillard4432df22003-09-28 18:58:27 +000010583#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010584
10585/************************************************************************
10586 * *
10587 * Front ends when parsing an Entity *
10588 * *
10589 ************************************************************************/
10590
10591/**
Owen Taylor3473f882001-02-23 17:55:21 +000010592 * xmlParseCtxtExternalEntity:
10593 * @ctx: the existing parsing context
10594 * @URL: the URL for the entity to load
10595 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010596 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010597 *
10598 * Parse an external general entity within an existing parsing context
10599 * An external general parsed entity is well-formed if it matches the
10600 * production labeled extParsedEnt.
10601 *
10602 * [78] extParsedEnt ::= TextDecl? content
10603 *
10604 * Returns 0 if the entity is well formed, -1 in case of args problem and
10605 * the parser error code otherwise
10606 */
10607
10608int
10609xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010610 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010611 xmlParserCtxtPtr ctxt;
10612 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010613 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010614 xmlSAXHandlerPtr oldsax = NULL;
10615 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010616 xmlChar start[4];
10617 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010618
Daniel Veillardce682bc2004-11-05 17:22:25 +000010619 if (ctx == NULL) return(-1);
10620
Owen Taylor3473f882001-02-23 17:55:21 +000010621 if (ctx->depth > 40) {
10622 return(XML_ERR_ENTITY_LOOP);
10623 }
10624
Daniel Veillardcda96922001-08-21 10:56:31 +000010625 if (lst != NULL)
10626 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010627 if ((URL == NULL) && (ID == NULL))
10628 return(-1);
10629 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10630 return(-1);
10631
10632
10633 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10634 if (ctxt == NULL) return(-1);
10635 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010636 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010637 oldsax = ctxt->sax;
10638 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010639 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010640 newDoc = xmlNewDoc(BAD_CAST "1.0");
10641 if (newDoc == NULL) {
10642 xmlFreeParserCtxt(ctxt);
10643 return(-1);
10644 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010645 if (ctx->myDoc->dict) {
10646 newDoc->dict = ctx->myDoc->dict;
10647 xmlDictReference(newDoc->dict);
10648 }
Owen Taylor3473f882001-02-23 17:55:21 +000010649 if (ctx->myDoc != NULL) {
10650 newDoc->intSubset = ctx->myDoc->intSubset;
10651 newDoc->extSubset = ctx->myDoc->extSubset;
10652 }
10653 if (ctx->myDoc->URL != NULL) {
10654 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10655 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010656 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10657 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010658 ctxt->sax = oldsax;
10659 xmlFreeParserCtxt(ctxt);
10660 newDoc->intSubset = NULL;
10661 newDoc->extSubset = NULL;
10662 xmlFreeDoc(newDoc);
10663 return(-1);
10664 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010665 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010666 nodePush(ctxt, newDoc->children);
10667 if (ctx->myDoc == NULL) {
10668 ctxt->myDoc = newDoc;
10669 } else {
10670 ctxt->myDoc = ctx->myDoc;
10671 newDoc->children->doc = ctx->myDoc;
10672 }
10673
Daniel Veillard87a764e2001-06-20 17:41:10 +000010674 /*
10675 * Get the 4 first bytes and decode the charset
10676 * if enc != XML_CHAR_ENCODING_NONE
10677 * plug some encoding conversion routines.
10678 */
10679 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010680 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10681 start[0] = RAW;
10682 start[1] = NXT(1);
10683 start[2] = NXT(2);
10684 start[3] = NXT(3);
10685 enc = xmlDetectCharEncoding(start, 4);
10686 if (enc != XML_CHAR_ENCODING_NONE) {
10687 xmlSwitchEncoding(ctxt, enc);
10688 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010689 }
10690
Owen Taylor3473f882001-02-23 17:55:21 +000010691 /*
10692 * Parse a possible text declaration first
10693 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010694 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010695 xmlParseTextDecl(ctxt);
10696 }
10697
10698 /*
10699 * Doing validity checking on chunk doesn't make sense
10700 */
10701 ctxt->instate = XML_PARSER_CONTENT;
10702 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010703 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010704 ctxt->loadsubset = ctx->loadsubset;
10705 ctxt->depth = ctx->depth + 1;
10706 ctxt->replaceEntities = ctx->replaceEntities;
10707 if (ctxt->validate) {
10708 ctxt->vctxt.error = ctx->vctxt.error;
10709 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010710 } else {
10711 ctxt->vctxt.error = NULL;
10712 ctxt->vctxt.warning = NULL;
10713 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010714 ctxt->vctxt.nodeTab = NULL;
10715 ctxt->vctxt.nodeNr = 0;
10716 ctxt->vctxt.nodeMax = 0;
10717 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010718 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10719 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010720 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10721 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10722 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010723 ctxt->dictNames = ctx->dictNames;
10724 ctxt->attsDefault = ctx->attsDefault;
10725 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010726 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010727
10728 xmlParseContent(ctxt);
10729
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010730 ctx->validate = ctxt->validate;
10731 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010732 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010733 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010734 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010735 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010736 }
10737 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010738 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010739 }
10740
10741 if (!ctxt->wellFormed) {
10742 if (ctxt->errNo == 0)
10743 ret = 1;
10744 else
10745 ret = ctxt->errNo;
10746 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010747 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010748 xmlNodePtr cur;
10749
10750 /*
10751 * Return the newly created nodeset after unlinking it from
10752 * they pseudo parent.
10753 */
10754 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010755 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010756 while (cur != NULL) {
10757 cur->parent = NULL;
10758 cur = cur->next;
10759 }
10760 newDoc->children->children = NULL;
10761 }
10762 ret = 0;
10763 }
10764 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010765 ctxt->dict = NULL;
10766 ctxt->attsDefault = NULL;
10767 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010768 xmlFreeParserCtxt(ctxt);
10769 newDoc->intSubset = NULL;
10770 newDoc->extSubset = NULL;
10771 xmlFreeDoc(newDoc);
10772
10773 return(ret);
10774}
10775
10776/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010777 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010778 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010779 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010780 * @sax: the SAX handler bloc (possibly NULL)
10781 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10782 * @depth: Used for loop detection, use 0
10783 * @URL: the URL for the entity to load
10784 * @ID: the System ID for the entity to load
10785 * @list: the return value for the set of parsed nodes
10786 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010787 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010788 *
10789 * Returns 0 if the entity is well formed, -1 in case of args problem and
10790 * the parser error code otherwise
10791 */
10792
Daniel Veillard7d515752003-09-26 19:12:37 +000010793static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010794xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10795 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010796 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010797 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010798 xmlParserCtxtPtr ctxt;
10799 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010800 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010801 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010802 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010803 xmlChar start[4];
10804 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010805
10806 if (depth > 40) {
10807 return(XML_ERR_ENTITY_LOOP);
10808 }
10809
10810
10811
10812 if (list != NULL)
10813 *list = NULL;
10814 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010815 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010816 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010817 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010818
10819
10820 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010821 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010822 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010823 if (oldctxt != NULL) {
10824 ctxt->_private = oldctxt->_private;
10825 ctxt->loadsubset = oldctxt->loadsubset;
10826 ctxt->validate = oldctxt->validate;
10827 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010828 ctxt->record_info = oldctxt->record_info;
10829 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10830 ctxt->node_seq.length = oldctxt->node_seq.length;
10831 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010832 } else {
10833 /*
10834 * Doing validity checking on chunk without context
10835 * doesn't make sense
10836 */
10837 ctxt->_private = NULL;
10838 ctxt->validate = 0;
10839 ctxt->external = 2;
10840 ctxt->loadsubset = 0;
10841 }
Owen Taylor3473f882001-02-23 17:55:21 +000010842 if (sax != NULL) {
10843 oldsax = ctxt->sax;
10844 ctxt->sax = sax;
10845 if (user_data != NULL)
10846 ctxt->userData = user_data;
10847 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010848 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010849 newDoc = xmlNewDoc(BAD_CAST "1.0");
10850 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010851 ctxt->node_seq.maximum = 0;
10852 ctxt->node_seq.length = 0;
10853 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010854 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010855 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010856 }
10857 if (doc != NULL) {
10858 newDoc->intSubset = doc->intSubset;
10859 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010860 newDoc->dict = doc->dict;
10861 } else if (oldctxt != NULL) {
10862 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010863 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010864 xmlDictReference(newDoc->dict);
10865
Owen Taylor3473f882001-02-23 17:55:21 +000010866 if (doc->URL != NULL) {
10867 newDoc->URL = xmlStrdup(doc->URL);
10868 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010869 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10870 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010871 if (sax != NULL)
10872 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010873 ctxt->node_seq.maximum = 0;
10874 ctxt->node_seq.length = 0;
10875 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010876 xmlFreeParserCtxt(ctxt);
10877 newDoc->intSubset = NULL;
10878 newDoc->extSubset = NULL;
10879 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010880 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010881 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010882 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010883 nodePush(ctxt, newDoc->children);
10884 if (doc == NULL) {
10885 ctxt->myDoc = newDoc;
10886 } else {
10887 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010888 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010889 }
10890
Daniel Veillard87a764e2001-06-20 17:41:10 +000010891 /*
10892 * Get the 4 first bytes and decode the charset
10893 * if enc != XML_CHAR_ENCODING_NONE
10894 * plug some encoding conversion routines.
10895 */
10896 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010897 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10898 start[0] = RAW;
10899 start[1] = NXT(1);
10900 start[2] = NXT(2);
10901 start[3] = NXT(3);
10902 enc = xmlDetectCharEncoding(start, 4);
10903 if (enc != XML_CHAR_ENCODING_NONE) {
10904 xmlSwitchEncoding(ctxt, enc);
10905 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010906 }
10907
Owen Taylor3473f882001-02-23 17:55:21 +000010908 /*
10909 * Parse a possible text declaration first
10910 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010911 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010912 xmlParseTextDecl(ctxt);
10913 }
10914
Owen Taylor3473f882001-02-23 17:55:21 +000010915 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010916 ctxt->depth = depth;
10917
10918 xmlParseContent(ctxt);
10919
Daniel Veillard561b7f82002-03-20 21:55:57 +000010920 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010921 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010922 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010923 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010924 }
10925 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010927 }
10928
10929 if (!ctxt->wellFormed) {
10930 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010931 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010932 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010933 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010934 } else {
10935 if (list != NULL) {
10936 xmlNodePtr cur;
10937
10938 /*
10939 * Return the newly created nodeset after unlinking it from
10940 * they pseudo parent.
10941 */
10942 cur = newDoc->children->children;
10943 *list = cur;
10944 while (cur != NULL) {
10945 cur->parent = NULL;
10946 cur = cur->next;
10947 }
10948 newDoc->children->children = NULL;
10949 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010950 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010951 }
10952 if (sax != NULL)
10953 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010954 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10955 oldctxt->node_seq.length = ctxt->node_seq.length;
10956 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010957 ctxt->node_seq.maximum = 0;
10958 ctxt->node_seq.length = 0;
10959 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010960 xmlFreeParserCtxt(ctxt);
10961 newDoc->intSubset = NULL;
10962 newDoc->extSubset = NULL;
10963 xmlFreeDoc(newDoc);
10964
10965 return(ret);
10966}
10967
Daniel Veillard81273902003-09-30 00:43:48 +000010968#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010969/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010970 * xmlParseExternalEntity:
10971 * @doc: the document the chunk pertains to
10972 * @sax: the SAX handler bloc (possibly NULL)
10973 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10974 * @depth: Used for loop detection, use 0
10975 * @URL: the URL for the entity to load
10976 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010977 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010978 *
10979 * Parse an external general entity
10980 * An external general parsed entity is well-formed if it matches the
10981 * production labeled extParsedEnt.
10982 *
10983 * [78] extParsedEnt ::= TextDecl? content
10984 *
10985 * Returns 0 if the entity is well formed, -1 in case of args problem and
10986 * the parser error code otherwise
10987 */
10988
10989int
10990xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010991 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010992 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010993 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010994}
10995
10996/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010997 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010998 * @doc: the document the chunk pertains to
10999 * @sax: the SAX handler bloc (possibly NULL)
11000 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11001 * @depth: Used for loop detection, use 0
11002 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011003 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011004 *
11005 * Parse a well-balanced chunk of an XML document
11006 * called by the parser
11007 * The allowed sequence for the Well Balanced Chunk is the one defined by
11008 * the content production in the XML grammar:
11009 *
11010 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11011 *
11012 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11013 * the parser error code otherwise
11014 */
11015
11016int
11017xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011018 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011019 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11020 depth, string, lst, 0 );
11021}
Daniel Veillard81273902003-09-30 00:43:48 +000011022#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011023
11024/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011025 * xmlParseBalancedChunkMemoryInternal:
11026 * @oldctxt: the existing parsing context
11027 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11028 * @user_data: the user data field for the parser context
11029 * @lst: the return value for the set of parsed nodes
11030 *
11031 *
11032 * Parse a well-balanced chunk of an XML document
11033 * called by the parser
11034 * The allowed sequence for the Well Balanced Chunk is the one defined by
11035 * the content production in the XML grammar:
11036 *
11037 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11038 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011039 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11040 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011041 *
11042 * In case recover is set to 1, the nodelist will not be empty even if
11043 * the parsed chunk is not well balanced.
11044 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011045static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011046xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11047 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11048 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011049 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011050 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011051 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011052 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011053 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011054 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011055 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011056
11057 if (oldctxt->depth > 40) {
11058 return(XML_ERR_ENTITY_LOOP);
11059 }
11060
11061
11062 if (lst != NULL)
11063 *lst = NULL;
11064 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011065 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011066
11067 size = xmlStrlen(string);
11068
11069 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011070 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011071 if (user_data != NULL)
11072 ctxt->userData = user_data;
11073 else
11074 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011075 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11076 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011077 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11078 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11079 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011080
11081 oldsax = ctxt->sax;
11082 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011083 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011084 ctxt->replaceEntities = oldctxt->replaceEntities;
11085 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011086
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011087 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011088 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011089 newDoc = xmlNewDoc(BAD_CAST "1.0");
11090 if (newDoc == NULL) {
11091 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011092 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011093 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011094 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011095 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011096 newDoc->dict = ctxt->dict;
11097 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011098 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011099 } else {
11100 ctxt->myDoc = oldctxt->myDoc;
11101 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011102 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011103 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011104 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11105 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011106 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011107 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011108 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011109 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011110 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011111 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011112 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011113 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011114 ctxt->myDoc->children = NULL;
11115 ctxt->myDoc->last = NULL;
11116 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011117 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011118 ctxt->instate = XML_PARSER_CONTENT;
11119 ctxt->depth = oldctxt->depth + 1;
11120
Daniel Veillard328f48c2002-11-15 15:24:34 +000011121 ctxt->validate = 0;
11122 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011123 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11124 /*
11125 * ID/IDREF registration will be done in xmlValidateElement below
11126 */
11127 ctxt->loadsubset |= XML_SKIP_IDS;
11128 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011129 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011130 ctxt->attsDefault = oldctxt->attsDefault;
11131 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011132
Daniel Veillard68e9e742002-11-16 15:35:11 +000011133 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011134 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011135 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011136 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011137 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011138 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011139 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011140 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011141 }
11142
11143 if (!ctxt->wellFormed) {
11144 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011145 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011146 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011147 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011148 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011149 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011150 }
11151
William M. Brack7b9154b2003-09-27 19:23:50 +000011152 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011153 xmlNodePtr cur;
11154
11155 /*
11156 * Return the newly created nodeset after unlinking it from
11157 * they pseudo parent.
11158 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011159 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011160 *lst = cur;
11161 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011162#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011163 if (oldctxt->validate && oldctxt->wellFormed &&
11164 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11165 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11166 oldctxt->myDoc, cur);
11167 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011168#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011169 cur->parent = NULL;
11170 cur = cur->next;
11171 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011172 ctxt->myDoc->children->children = NULL;
11173 }
11174 if (ctxt->myDoc != NULL) {
11175 xmlFreeNode(ctxt->myDoc->children);
11176 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011177 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011178 }
11179
11180 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011181 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011182 ctxt->attsDefault = NULL;
11183 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011184 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011185 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011186 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011187 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011188
11189 return(ret);
11190}
11191
Daniel Veillard29b17482004-08-16 00:39:03 +000011192/**
11193 * xmlParseInNodeContext:
11194 * @node: the context node
11195 * @data: the input string
11196 * @datalen: the input string length in bytes
11197 * @options: a combination of xmlParserOption
11198 * @lst: the return value for the set of parsed nodes
11199 *
11200 * Parse a well-balanced chunk of an XML document
11201 * within the context (DTD, namespaces, etc ...) of the given node.
11202 *
11203 * The allowed sequence for the data is a Well Balanced Chunk defined by
11204 * the content production in the XML grammar:
11205 *
11206 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11207 *
11208 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11209 * error code otherwise
11210 */
11211xmlParserErrors
11212xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11213 int options, xmlNodePtr *lst) {
11214#ifdef SAX2
11215 xmlParserCtxtPtr ctxt;
11216 xmlDocPtr doc = NULL;
11217 xmlNodePtr fake, cur;
11218 int nsnr = 0;
11219
11220 xmlParserErrors ret = XML_ERR_OK;
11221
11222 /*
11223 * check all input parameters, grab the document
11224 */
11225 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11226 return(XML_ERR_INTERNAL_ERROR);
11227 switch (node->type) {
11228 case XML_ELEMENT_NODE:
11229 case XML_ATTRIBUTE_NODE:
11230 case XML_TEXT_NODE:
11231 case XML_CDATA_SECTION_NODE:
11232 case XML_ENTITY_REF_NODE:
11233 case XML_PI_NODE:
11234 case XML_COMMENT_NODE:
11235 case XML_DOCUMENT_NODE:
11236 case XML_HTML_DOCUMENT_NODE:
11237 break;
11238 default:
11239 return(XML_ERR_INTERNAL_ERROR);
11240
11241 }
11242 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11243 (node->type != XML_DOCUMENT_NODE) &&
11244 (node->type != XML_HTML_DOCUMENT_NODE))
11245 node = node->parent;
11246 if (node == NULL)
11247 return(XML_ERR_INTERNAL_ERROR);
11248 if (node->type == XML_ELEMENT_NODE)
11249 doc = node->doc;
11250 else
11251 doc = (xmlDocPtr) node;
11252 if (doc == NULL)
11253 return(XML_ERR_INTERNAL_ERROR);
11254
11255 /*
11256 * allocate a context and set-up everything not related to the
11257 * node position in the tree
11258 */
11259 if (doc->type == XML_DOCUMENT_NODE)
11260 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11261#ifdef LIBXML_HTML_ENABLED
11262 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11263 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11264#endif
11265 else
11266 return(XML_ERR_INTERNAL_ERROR);
11267
11268 if (ctxt == NULL)
11269 return(XML_ERR_NO_MEMORY);
11270 fake = xmlNewComment(NULL);
11271 if (fake == NULL) {
11272 xmlFreeParserCtxt(ctxt);
11273 return(XML_ERR_NO_MEMORY);
11274 }
11275 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011276
11277 /*
11278 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11279 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11280 * we must wait until the last moment to free the original one.
11281 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011282 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011283 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011284 xmlDictFree(ctxt->dict);
11285 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011286 } else
11287 options |= XML_PARSE_NODICT;
11288
11289 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011290 xmlDetectSAX2(ctxt);
11291 ctxt->myDoc = doc;
11292
11293 if (node->type == XML_ELEMENT_NODE) {
11294 nodePush(ctxt, node);
11295 /*
11296 * initialize the SAX2 namespaces stack
11297 */
11298 cur = node;
11299 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11300 xmlNsPtr ns = cur->nsDef;
11301 const xmlChar *iprefix, *ihref;
11302
11303 while (ns != NULL) {
11304 if (ctxt->dict) {
11305 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11306 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11307 } else {
11308 iprefix = ns->prefix;
11309 ihref = ns->href;
11310 }
11311
11312 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11313 nsPush(ctxt, iprefix, ihref);
11314 nsnr++;
11315 }
11316 ns = ns->next;
11317 }
11318 cur = cur->parent;
11319 }
11320 ctxt->instate = XML_PARSER_CONTENT;
11321 }
11322
11323 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11324 /*
11325 * ID/IDREF registration will be done in xmlValidateElement below
11326 */
11327 ctxt->loadsubset |= XML_SKIP_IDS;
11328 }
11329
11330 xmlParseContent(ctxt);
11331 nsPop(ctxt, nsnr);
11332 if ((RAW == '<') && (NXT(1) == '/')) {
11333 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11334 } else if (RAW != 0) {
11335 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11336 }
11337 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11338 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11339 ctxt->wellFormed = 0;
11340 }
11341
11342 if (!ctxt->wellFormed) {
11343 if (ctxt->errNo == 0)
11344 ret = XML_ERR_INTERNAL_ERROR;
11345 else
11346 ret = (xmlParserErrors)ctxt->errNo;
11347 } else {
11348 ret = XML_ERR_OK;
11349 }
11350
11351 /*
11352 * Return the newly created nodeset after unlinking it from
11353 * the pseudo sibling.
11354 */
11355
11356 cur = fake->next;
11357 fake->next = NULL;
11358 node->last = fake;
11359
11360 if (cur != NULL) {
11361 cur->prev = NULL;
11362 }
11363
11364 *lst = cur;
11365
11366 while (cur != NULL) {
11367 cur->parent = NULL;
11368 cur = cur->next;
11369 }
11370
11371 xmlUnlinkNode(fake);
11372 xmlFreeNode(fake);
11373
11374
11375 if (ret != XML_ERR_OK) {
11376 xmlFreeNodeList(*lst);
11377 *lst = NULL;
11378 }
William M. Brackc3f81342004-10-03 01:22:44 +000011379
William M. Brackb7b54de2004-10-06 16:38:01 +000011380 if (doc->dict != NULL)
11381 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011382 xmlFreeParserCtxt(ctxt);
11383
11384 return(ret);
11385#else /* !SAX2 */
11386 return(XML_ERR_INTERNAL_ERROR);
11387#endif
11388}
11389
Daniel Veillard81273902003-09-30 00:43:48 +000011390#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011391/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011392 * xmlParseBalancedChunkMemoryRecover:
11393 * @doc: the document the chunk pertains to
11394 * @sax: the SAX handler bloc (possibly NULL)
11395 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11396 * @depth: Used for loop detection, use 0
11397 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11398 * @lst: the return value for the set of parsed nodes
11399 * @recover: return nodes even if the data is broken (use 0)
11400 *
11401 *
11402 * Parse a well-balanced chunk of an XML document
11403 * called by the parser
11404 * The allowed sequence for the Well Balanced Chunk is the one defined by
11405 * the content production in the XML grammar:
11406 *
11407 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11408 *
11409 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11410 * the parser error code otherwise
11411 *
11412 * In case recover is set to 1, the nodelist will not be empty even if
11413 * the parsed chunk is not well balanced.
11414 */
11415int
11416xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11417 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11418 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011419 xmlParserCtxtPtr ctxt;
11420 xmlDocPtr newDoc;
11421 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011422 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011423 int size;
11424 int ret = 0;
11425
11426 if (depth > 40) {
11427 return(XML_ERR_ENTITY_LOOP);
11428 }
11429
11430
Daniel Veillardcda96922001-08-21 10:56:31 +000011431 if (lst != NULL)
11432 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011433 if (string == NULL)
11434 return(-1);
11435
11436 size = xmlStrlen(string);
11437
11438 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11439 if (ctxt == NULL) return(-1);
11440 ctxt->userData = ctxt;
11441 if (sax != NULL) {
11442 oldsax = ctxt->sax;
11443 ctxt->sax = sax;
11444 if (user_data != NULL)
11445 ctxt->userData = user_data;
11446 }
11447 newDoc = xmlNewDoc(BAD_CAST "1.0");
11448 if (newDoc == NULL) {
11449 xmlFreeParserCtxt(ctxt);
11450 return(-1);
11451 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011452 if ((doc != NULL) && (doc->dict != NULL)) {
11453 xmlDictFree(ctxt->dict);
11454 ctxt->dict = doc->dict;
11455 xmlDictReference(ctxt->dict);
11456 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11457 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11458 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11459 ctxt->dictNames = 1;
11460 } else {
11461 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11462 }
Owen Taylor3473f882001-02-23 17:55:21 +000011463 if (doc != NULL) {
11464 newDoc->intSubset = doc->intSubset;
11465 newDoc->extSubset = doc->extSubset;
11466 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011467 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11468 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011469 if (sax != NULL)
11470 ctxt->sax = oldsax;
11471 xmlFreeParserCtxt(ctxt);
11472 newDoc->intSubset = NULL;
11473 newDoc->extSubset = NULL;
11474 xmlFreeDoc(newDoc);
11475 return(-1);
11476 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011477 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11478 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011479 if (doc == NULL) {
11480 ctxt->myDoc = newDoc;
11481 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011482 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011483 newDoc->children->doc = doc;
11484 }
11485 ctxt->instate = XML_PARSER_CONTENT;
11486 ctxt->depth = depth;
11487
11488 /*
11489 * Doing validity checking on chunk doesn't make sense
11490 */
11491 ctxt->validate = 0;
11492 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011493 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011494
Daniel Veillardb39bc392002-10-26 19:29:51 +000011495 if ( doc != NULL ){
11496 content = doc->children;
11497 doc->children = NULL;
11498 xmlParseContent(ctxt);
11499 doc->children = content;
11500 }
11501 else {
11502 xmlParseContent(ctxt);
11503 }
Owen Taylor3473f882001-02-23 17:55:21 +000011504 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011505 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011506 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011507 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011508 }
11509 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011510 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011511 }
11512
11513 if (!ctxt->wellFormed) {
11514 if (ctxt->errNo == 0)
11515 ret = 1;
11516 else
11517 ret = ctxt->errNo;
11518 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011519 ret = 0;
11520 }
11521
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011522 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11523 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011524
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011525 /*
11526 * Return the newly created nodeset after unlinking it from
11527 * they pseudo parent.
11528 */
11529 cur = newDoc->children->children;
11530 *lst = cur;
11531 while (cur != NULL) {
11532 xmlSetTreeDoc(cur, doc);
11533 cur->parent = NULL;
11534 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011535 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011536 newDoc->children->children = NULL;
11537 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011538
Owen Taylor3473f882001-02-23 17:55:21 +000011539 if (sax != NULL)
11540 ctxt->sax = oldsax;
11541 xmlFreeParserCtxt(ctxt);
11542 newDoc->intSubset = NULL;
11543 newDoc->extSubset = NULL;
11544 xmlFreeDoc(newDoc);
11545
11546 return(ret);
11547}
11548
11549/**
11550 * xmlSAXParseEntity:
11551 * @sax: the SAX handler block
11552 * @filename: the filename
11553 *
11554 * parse an XML external entity out of context and build a tree.
11555 * It use the given SAX function block to handle the parsing callback.
11556 * If sax is NULL, fallback to the default DOM tree building routines.
11557 *
11558 * [78] extParsedEnt ::= TextDecl? content
11559 *
11560 * This correspond to a "Well Balanced" chunk
11561 *
11562 * Returns the resulting document tree
11563 */
11564
11565xmlDocPtr
11566xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11567 xmlDocPtr ret;
11568 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011569
11570 ctxt = xmlCreateFileParserCtxt(filename);
11571 if (ctxt == NULL) {
11572 return(NULL);
11573 }
11574 if (sax != NULL) {
11575 if (ctxt->sax != NULL)
11576 xmlFree(ctxt->sax);
11577 ctxt->sax = sax;
11578 ctxt->userData = NULL;
11579 }
11580
Owen Taylor3473f882001-02-23 17:55:21 +000011581 xmlParseExtParsedEnt(ctxt);
11582
11583 if (ctxt->wellFormed)
11584 ret = ctxt->myDoc;
11585 else {
11586 ret = NULL;
11587 xmlFreeDoc(ctxt->myDoc);
11588 ctxt->myDoc = NULL;
11589 }
11590 if (sax != NULL)
11591 ctxt->sax = NULL;
11592 xmlFreeParserCtxt(ctxt);
11593
11594 return(ret);
11595}
11596
11597/**
11598 * xmlParseEntity:
11599 * @filename: the filename
11600 *
11601 * parse an XML external entity out of context and build a tree.
11602 *
11603 * [78] extParsedEnt ::= TextDecl? content
11604 *
11605 * This correspond to a "Well Balanced" chunk
11606 *
11607 * Returns the resulting document tree
11608 */
11609
11610xmlDocPtr
11611xmlParseEntity(const char *filename) {
11612 return(xmlSAXParseEntity(NULL, filename));
11613}
Daniel Veillard81273902003-09-30 00:43:48 +000011614#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011615
11616/**
11617 * xmlCreateEntityParserCtxt:
11618 * @URL: the entity URL
11619 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011620 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011621 *
11622 * Create a parser context for an external entity
11623 * Automatic support for ZLIB/Compress compressed document is provided
11624 * by default if found at compile-time.
11625 *
11626 * Returns the new parser context or NULL
11627 */
11628xmlParserCtxtPtr
11629xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11630 const xmlChar *base) {
11631 xmlParserCtxtPtr ctxt;
11632 xmlParserInputPtr inputStream;
11633 char *directory = NULL;
11634 xmlChar *uri;
11635
11636 ctxt = xmlNewParserCtxt();
11637 if (ctxt == NULL) {
11638 return(NULL);
11639 }
11640
11641 uri = xmlBuildURI(URL, base);
11642
11643 if (uri == NULL) {
11644 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11645 if (inputStream == NULL) {
11646 xmlFreeParserCtxt(ctxt);
11647 return(NULL);
11648 }
11649
11650 inputPush(ctxt, inputStream);
11651
11652 if ((ctxt->directory == NULL) && (directory == NULL))
11653 directory = xmlParserGetDirectory((char *)URL);
11654 if ((ctxt->directory == NULL) && (directory != NULL))
11655 ctxt->directory = directory;
11656 } else {
11657 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11658 if (inputStream == NULL) {
11659 xmlFree(uri);
11660 xmlFreeParserCtxt(ctxt);
11661 return(NULL);
11662 }
11663
11664 inputPush(ctxt, inputStream);
11665
11666 if ((ctxt->directory == NULL) && (directory == NULL))
11667 directory = xmlParserGetDirectory((char *)uri);
11668 if ((ctxt->directory == NULL) && (directory != NULL))
11669 ctxt->directory = directory;
11670 xmlFree(uri);
11671 }
Owen Taylor3473f882001-02-23 17:55:21 +000011672 return(ctxt);
11673}
11674
11675/************************************************************************
11676 * *
11677 * Front ends when parsing from a file *
11678 * *
11679 ************************************************************************/
11680
11681/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011682 * xmlCreateURLParserCtxt:
11683 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011684 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011685 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011686 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011687 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011688 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011689 *
11690 * Returns the new parser context or NULL
11691 */
11692xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011693xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011694{
11695 xmlParserCtxtPtr ctxt;
11696 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011697 char *directory = NULL;
11698
Owen Taylor3473f882001-02-23 17:55:21 +000011699 ctxt = xmlNewParserCtxt();
11700 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011701 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011702 return(NULL);
11703 }
11704
Daniel Veillarddf292f72005-01-16 19:00:15 +000011705 if (options)
11706 xmlCtxtUseOptions(ctxt, options);
11707 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011708
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011709 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011710 if (inputStream == NULL) {
11711 xmlFreeParserCtxt(ctxt);
11712 return(NULL);
11713 }
11714
Owen Taylor3473f882001-02-23 17:55:21 +000011715 inputPush(ctxt, inputStream);
11716 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011717 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011718 if ((ctxt->directory == NULL) && (directory != NULL))
11719 ctxt->directory = directory;
11720
11721 return(ctxt);
11722}
11723
Daniel Veillard61b93382003-11-03 14:28:31 +000011724/**
11725 * xmlCreateFileParserCtxt:
11726 * @filename: the filename
11727 *
11728 * Create a parser context for a file content.
11729 * Automatic support for ZLIB/Compress compressed document is provided
11730 * by default if found at compile-time.
11731 *
11732 * Returns the new parser context or NULL
11733 */
11734xmlParserCtxtPtr
11735xmlCreateFileParserCtxt(const char *filename)
11736{
11737 return(xmlCreateURLParserCtxt(filename, 0));
11738}
11739
Daniel Veillard81273902003-09-30 00:43:48 +000011740#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011741/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011742 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011743 * @sax: the SAX handler block
11744 * @filename: the filename
11745 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11746 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011747 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011748 *
11749 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11750 * compressed document is provided by default if found at compile-time.
11751 * It use the given SAX function block to handle the parsing callback.
11752 * If sax is NULL, fallback to the default DOM tree building routines.
11753 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011754 * User data (void *) is stored within the parser context in the
11755 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011756 *
Owen Taylor3473f882001-02-23 17:55:21 +000011757 * Returns the resulting document tree
11758 */
11759
11760xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011761xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11762 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011763 xmlDocPtr ret;
11764 xmlParserCtxtPtr ctxt;
11765 char *directory = NULL;
11766
Daniel Veillard635ef722001-10-29 11:48:19 +000011767 xmlInitParser();
11768
Owen Taylor3473f882001-02-23 17:55:21 +000011769 ctxt = xmlCreateFileParserCtxt(filename);
11770 if (ctxt == NULL) {
11771 return(NULL);
11772 }
11773 if (sax != NULL) {
11774 if (ctxt->sax != NULL)
11775 xmlFree(ctxt->sax);
11776 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011777 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011778 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011779 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011780 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011781 }
Owen Taylor3473f882001-02-23 17:55:21 +000011782
11783 if ((ctxt->directory == NULL) && (directory == NULL))
11784 directory = xmlParserGetDirectory(filename);
11785 if ((ctxt->directory == NULL) && (directory != NULL))
11786 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11787
Daniel Veillarddad3f682002-11-17 16:47:27 +000011788 ctxt->recovery = recovery;
11789
Owen Taylor3473f882001-02-23 17:55:21 +000011790 xmlParseDocument(ctxt);
11791
William M. Brackc07329e2003-09-08 01:57:30 +000011792 if ((ctxt->wellFormed) || recovery) {
11793 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011794 if (ret != NULL) {
11795 if (ctxt->input->buf->compressed > 0)
11796 ret->compression = 9;
11797 else
11798 ret->compression = ctxt->input->buf->compressed;
11799 }
William M. Brackc07329e2003-09-08 01:57:30 +000011800 }
Owen Taylor3473f882001-02-23 17:55:21 +000011801 else {
11802 ret = NULL;
11803 xmlFreeDoc(ctxt->myDoc);
11804 ctxt->myDoc = NULL;
11805 }
11806 if (sax != NULL)
11807 ctxt->sax = NULL;
11808 xmlFreeParserCtxt(ctxt);
11809
11810 return(ret);
11811}
11812
11813/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011814 * xmlSAXParseFile:
11815 * @sax: the SAX handler block
11816 * @filename: the filename
11817 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11818 * documents
11819 *
11820 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11821 * compressed document is provided by default if found at compile-time.
11822 * It use the given SAX function block to handle the parsing callback.
11823 * If sax is NULL, fallback to the default DOM tree building routines.
11824 *
11825 * Returns the resulting document tree
11826 */
11827
11828xmlDocPtr
11829xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11830 int recovery) {
11831 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11832}
11833
11834/**
Owen Taylor3473f882001-02-23 17:55:21 +000011835 * xmlRecoverDoc:
11836 * @cur: a pointer to an array of xmlChar
11837 *
11838 * parse an XML in-memory document and build a tree.
11839 * In the case the document is not Well Formed, a tree is built anyway
11840 *
11841 * Returns the resulting document tree
11842 */
11843
11844xmlDocPtr
11845xmlRecoverDoc(xmlChar *cur) {
11846 return(xmlSAXParseDoc(NULL, cur, 1));
11847}
11848
11849/**
11850 * xmlParseFile:
11851 * @filename: the filename
11852 *
11853 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11854 * compressed document is provided by default if found at compile-time.
11855 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011856 * Returns the resulting document tree if the file was wellformed,
11857 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011858 */
11859
11860xmlDocPtr
11861xmlParseFile(const char *filename) {
11862 return(xmlSAXParseFile(NULL, filename, 0));
11863}
11864
11865/**
11866 * xmlRecoverFile:
11867 * @filename: the filename
11868 *
11869 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11870 * compressed document is provided by default if found at compile-time.
11871 * In the case the document is not Well Formed, a tree is built anyway
11872 *
11873 * Returns the resulting document tree
11874 */
11875
11876xmlDocPtr
11877xmlRecoverFile(const char *filename) {
11878 return(xmlSAXParseFile(NULL, filename, 1));
11879}
11880
11881
11882/**
11883 * xmlSetupParserForBuffer:
11884 * @ctxt: an XML parser context
11885 * @buffer: a xmlChar * buffer
11886 * @filename: a file name
11887 *
11888 * Setup the parser context to parse a new buffer; Clears any prior
11889 * contents from the parser context. The buffer parameter must not be
11890 * NULL, but the filename parameter can be
11891 */
11892void
11893xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11894 const char* filename)
11895{
11896 xmlParserInputPtr input;
11897
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011898 if ((ctxt == NULL) || (buffer == NULL))
11899 return;
11900
Owen Taylor3473f882001-02-23 17:55:21 +000011901 input = xmlNewInputStream(ctxt);
11902 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011903 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011904 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011905 return;
11906 }
11907
11908 xmlClearParserCtxt(ctxt);
11909 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011910 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011911 input->base = buffer;
11912 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011913 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011914 inputPush(ctxt, input);
11915}
11916
11917/**
11918 * xmlSAXUserParseFile:
11919 * @sax: a SAX handler
11920 * @user_data: The user data returned on SAX callbacks
11921 * @filename: a file name
11922 *
11923 * parse an XML file and call the given SAX handler routines.
11924 * Automatic support for ZLIB/Compress compressed document is provided
11925 *
11926 * Returns 0 in case of success or a error number otherwise
11927 */
11928int
11929xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11930 const char *filename) {
11931 int ret = 0;
11932 xmlParserCtxtPtr ctxt;
11933
11934 ctxt = xmlCreateFileParserCtxt(filename);
11935 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011936#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011937 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011938#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011939 xmlFree(ctxt->sax);
11940 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011941 xmlDetectSAX2(ctxt);
11942
Owen Taylor3473f882001-02-23 17:55:21 +000011943 if (user_data != NULL)
11944 ctxt->userData = user_data;
11945
11946 xmlParseDocument(ctxt);
11947
11948 if (ctxt->wellFormed)
11949 ret = 0;
11950 else {
11951 if (ctxt->errNo != 0)
11952 ret = ctxt->errNo;
11953 else
11954 ret = -1;
11955 }
11956 if (sax != NULL)
11957 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011958 if (ctxt->myDoc != NULL) {
11959 xmlFreeDoc(ctxt->myDoc);
11960 ctxt->myDoc = NULL;
11961 }
Owen Taylor3473f882001-02-23 17:55:21 +000011962 xmlFreeParserCtxt(ctxt);
11963
11964 return ret;
11965}
Daniel Veillard81273902003-09-30 00:43:48 +000011966#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011967
11968/************************************************************************
11969 * *
11970 * Front ends when parsing from memory *
11971 * *
11972 ************************************************************************/
11973
11974/**
11975 * xmlCreateMemoryParserCtxt:
11976 * @buffer: a pointer to a char array
11977 * @size: the size of the array
11978 *
11979 * Create a parser context for an XML in-memory document.
11980 *
11981 * Returns the new parser context or NULL
11982 */
11983xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011984xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011985 xmlParserCtxtPtr ctxt;
11986 xmlParserInputPtr input;
11987 xmlParserInputBufferPtr buf;
11988
11989 if (buffer == NULL)
11990 return(NULL);
11991 if (size <= 0)
11992 return(NULL);
11993
11994 ctxt = xmlNewParserCtxt();
11995 if (ctxt == NULL)
11996 return(NULL);
11997
Daniel Veillard53350552003-09-18 13:35:51 +000011998 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011999 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012000 if (buf == NULL) {
12001 xmlFreeParserCtxt(ctxt);
12002 return(NULL);
12003 }
Owen Taylor3473f882001-02-23 17:55:21 +000012004
12005 input = xmlNewInputStream(ctxt);
12006 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012007 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012008 xmlFreeParserCtxt(ctxt);
12009 return(NULL);
12010 }
12011
12012 input->filename = NULL;
12013 input->buf = buf;
12014 input->base = input->buf->buffer->content;
12015 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012016 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012017
12018 inputPush(ctxt, input);
12019 return(ctxt);
12020}
12021
Daniel Veillard81273902003-09-30 00:43:48 +000012022#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012023/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012024 * xmlSAXParseMemoryWithData:
12025 * @sax: the SAX handler block
12026 * @buffer: an pointer to a char array
12027 * @size: the size of the array
12028 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12029 * documents
12030 * @data: the userdata
12031 *
12032 * parse an XML in-memory block and use the given SAX function block
12033 * to handle the parsing callback. If sax is NULL, fallback to the default
12034 * DOM tree building routines.
12035 *
12036 * User data (void *) is stored within the parser context in the
12037 * context's _private member, so it is available nearly everywhere in libxml
12038 *
12039 * Returns the resulting document tree
12040 */
12041
12042xmlDocPtr
12043xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12044 int size, int recovery, void *data) {
12045 xmlDocPtr ret;
12046 xmlParserCtxtPtr ctxt;
12047
12048 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12049 if (ctxt == NULL) return(NULL);
12050 if (sax != NULL) {
12051 if (ctxt->sax != NULL)
12052 xmlFree(ctxt->sax);
12053 ctxt->sax = sax;
12054 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012055 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012056 if (data!=NULL) {
12057 ctxt->_private=data;
12058 }
12059
Daniel Veillardadba5f12003-04-04 16:09:01 +000012060 ctxt->recovery = recovery;
12061
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012062 xmlParseDocument(ctxt);
12063
12064 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12065 else {
12066 ret = NULL;
12067 xmlFreeDoc(ctxt->myDoc);
12068 ctxt->myDoc = NULL;
12069 }
12070 if (sax != NULL)
12071 ctxt->sax = NULL;
12072 xmlFreeParserCtxt(ctxt);
12073
12074 return(ret);
12075}
12076
12077/**
Owen Taylor3473f882001-02-23 17:55:21 +000012078 * xmlSAXParseMemory:
12079 * @sax: the SAX handler block
12080 * @buffer: an pointer to a char array
12081 * @size: the size of the array
12082 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12083 * documents
12084 *
12085 * parse an XML in-memory block and use the given SAX function block
12086 * to handle the parsing callback. If sax is NULL, fallback to the default
12087 * DOM tree building routines.
12088 *
12089 * Returns the resulting document tree
12090 */
12091xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012092xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12093 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012094 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012095}
12096
12097/**
12098 * xmlParseMemory:
12099 * @buffer: an pointer to a char array
12100 * @size: the size of the array
12101 *
12102 * parse an XML in-memory block and build a tree.
12103 *
12104 * Returns the resulting document tree
12105 */
12106
Daniel Veillard50822cb2001-07-26 20:05:51 +000012107xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012108 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12109}
12110
12111/**
12112 * xmlRecoverMemory:
12113 * @buffer: an pointer to a char array
12114 * @size: the size of the array
12115 *
12116 * parse an XML in-memory block and build a tree.
12117 * In the case the document is not Well Formed, a tree is built anyway
12118 *
12119 * Returns the resulting document tree
12120 */
12121
Daniel Veillard50822cb2001-07-26 20:05:51 +000012122xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012123 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12124}
12125
12126/**
12127 * xmlSAXUserParseMemory:
12128 * @sax: a SAX handler
12129 * @user_data: The user data returned on SAX callbacks
12130 * @buffer: an in-memory XML document input
12131 * @size: the length of the XML document in bytes
12132 *
12133 * A better SAX parsing routine.
12134 * parse an XML in-memory buffer and call the given SAX handler routines.
12135 *
12136 * Returns 0 in case of success or a error number otherwise
12137 */
12138int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012139 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012140 int ret = 0;
12141 xmlParserCtxtPtr ctxt;
12142 xmlSAXHandlerPtr oldsax = NULL;
12143
Daniel Veillard9e923512002-08-14 08:48:52 +000012144 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012145 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12146 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012147 oldsax = ctxt->sax;
12148 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012149 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012150 if (user_data != NULL)
12151 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012152
12153 xmlParseDocument(ctxt);
12154
12155 if (ctxt->wellFormed)
12156 ret = 0;
12157 else {
12158 if (ctxt->errNo != 0)
12159 ret = ctxt->errNo;
12160 else
12161 ret = -1;
12162 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012163 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012164 if (ctxt->myDoc != NULL) {
12165 xmlFreeDoc(ctxt->myDoc);
12166 ctxt->myDoc = NULL;
12167 }
Owen Taylor3473f882001-02-23 17:55:21 +000012168 xmlFreeParserCtxt(ctxt);
12169
12170 return ret;
12171}
Daniel Veillard81273902003-09-30 00:43:48 +000012172#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012173
12174/**
12175 * xmlCreateDocParserCtxt:
12176 * @cur: a pointer to an array of xmlChar
12177 *
12178 * Creates a parser context for an XML in-memory document.
12179 *
12180 * Returns the new parser context or NULL
12181 */
12182xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012183xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012184 int len;
12185
12186 if (cur == NULL)
12187 return(NULL);
12188 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012189 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012190}
12191
Daniel Veillard81273902003-09-30 00:43:48 +000012192#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012193/**
12194 * xmlSAXParseDoc:
12195 * @sax: the SAX handler block
12196 * @cur: a pointer to an array of xmlChar
12197 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12198 * documents
12199 *
12200 * parse an XML in-memory document and build a tree.
12201 * It use the given SAX function block to handle the parsing callback.
12202 * If sax is NULL, fallback to the default DOM tree building routines.
12203 *
12204 * Returns the resulting document tree
12205 */
12206
12207xmlDocPtr
12208xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
12209 xmlDocPtr ret;
12210 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012211 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012212
Daniel Veillard38936062004-11-04 17:45:11 +000012213 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012214
12215
12216 ctxt = xmlCreateDocParserCtxt(cur);
12217 if (ctxt == NULL) return(NULL);
12218 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012219 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012220 ctxt->sax = sax;
12221 ctxt->userData = NULL;
12222 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012223 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012224
12225 xmlParseDocument(ctxt);
12226 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12227 else {
12228 ret = NULL;
12229 xmlFreeDoc(ctxt->myDoc);
12230 ctxt->myDoc = NULL;
12231 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012232 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012233 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012234 xmlFreeParserCtxt(ctxt);
12235
12236 return(ret);
12237}
12238
12239/**
12240 * xmlParseDoc:
12241 * @cur: a pointer to an array of xmlChar
12242 *
12243 * parse an XML in-memory document and build a tree.
12244 *
12245 * Returns the resulting document tree
12246 */
12247
12248xmlDocPtr
12249xmlParseDoc(xmlChar *cur) {
12250 return(xmlSAXParseDoc(NULL, cur, 0));
12251}
Daniel Veillard81273902003-09-30 00:43:48 +000012252#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012253
Daniel Veillard81273902003-09-30 00:43:48 +000012254#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012255/************************************************************************
12256 * *
12257 * Specific function to keep track of entities references *
12258 * and used by the XSLT debugger *
12259 * *
12260 ************************************************************************/
12261
12262static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12263
12264/**
12265 * xmlAddEntityReference:
12266 * @ent : A valid entity
12267 * @firstNode : A valid first node for children of entity
12268 * @lastNode : A valid last node of children entity
12269 *
12270 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12271 */
12272static void
12273xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12274 xmlNodePtr lastNode)
12275{
12276 if (xmlEntityRefFunc != NULL) {
12277 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12278 }
12279}
12280
12281
12282/**
12283 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012284 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012285 *
12286 * Set the function to call call back when a xml reference has been made
12287 */
12288void
12289xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12290{
12291 xmlEntityRefFunc = func;
12292}
Daniel Veillard81273902003-09-30 00:43:48 +000012293#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012294
12295/************************************************************************
12296 * *
12297 * Miscellaneous *
12298 * *
12299 ************************************************************************/
12300
12301#ifdef LIBXML_XPATH_ENABLED
12302#include <libxml/xpath.h>
12303#endif
12304
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012305extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012306static int xmlParserInitialized = 0;
12307
12308/**
12309 * xmlInitParser:
12310 *
12311 * Initialization function for the XML parser.
12312 * This is not reentrant. Call once before processing in case of
12313 * use in multithreaded programs.
12314 */
12315
12316void
12317xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012318 if (xmlParserInitialized != 0)
12319 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012320
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012321 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12322 (xmlGenericError == NULL))
12323 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012324 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012325 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012326 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012327 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012328 xmlDefaultSAXHandlerInit();
12329 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012330#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012331 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012332#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012333#ifdef LIBXML_HTML_ENABLED
12334 htmlInitAutoClose();
12335 htmlDefaultSAXHandlerInit();
12336#endif
12337#ifdef LIBXML_XPATH_ENABLED
12338 xmlXPathInit();
12339#endif
12340 xmlParserInitialized = 1;
12341}
12342
12343/**
12344 * xmlCleanupParser:
12345 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012346 * Cleanup function for the XML library. It tries to reclaim all
12347 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012348 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012349 * function should not prevent reusing the library but one should
12350 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012351 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012352 */
12353
12354void
12355xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012356 if (!xmlParserInitialized)
12357 return;
12358
Owen Taylor3473f882001-02-23 17:55:21 +000012359 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012360#ifdef LIBXML_CATALOG_ENABLED
12361 xmlCatalogCleanup();
12362#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012363 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012364 xmlCleanupInputCallbacks();
12365#ifdef LIBXML_OUTPUT_ENABLED
12366 xmlCleanupOutputCallbacks();
12367#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012368#ifdef LIBXML_SCHEMAS_ENABLED
12369 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012370 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012371#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012372 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012373 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012374 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012375 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012376 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012377}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012378
12379/************************************************************************
12380 * *
12381 * New set (2.6.0) of simpler and more flexible APIs *
12382 * *
12383 ************************************************************************/
12384
12385/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012386 * DICT_FREE:
12387 * @str: a string
12388 *
12389 * Free a string if it is not owned by the "dict" dictionnary in the
12390 * current scope
12391 */
12392#define DICT_FREE(str) \
12393 if ((str) && ((!dict) || \
12394 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12395 xmlFree((char *)(str));
12396
12397/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012398 * xmlCtxtReset:
12399 * @ctxt: an XML parser context
12400 *
12401 * Reset a parser context
12402 */
12403void
12404xmlCtxtReset(xmlParserCtxtPtr ctxt)
12405{
12406 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012407 xmlDictPtr dict;
12408
12409 if (ctxt == NULL)
12410 return;
12411
12412 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012413
12414 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12415 xmlFreeInputStream(input);
12416 }
12417 ctxt->inputNr = 0;
12418 ctxt->input = NULL;
12419
12420 ctxt->spaceNr = 0;
12421 ctxt->spaceTab[0] = -1;
12422 ctxt->space = &ctxt->spaceTab[0];
12423
12424
12425 ctxt->nodeNr = 0;
12426 ctxt->node = NULL;
12427
12428 ctxt->nameNr = 0;
12429 ctxt->name = NULL;
12430
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012431 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012432 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012433 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012434 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012435 DICT_FREE(ctxt->directory);
12436 ctxt->directory = NULL;
12437 DICT_FREE(ctxt->extSubURI);
12438 ctxt->extSubURI = NULL;
12439 DICT_FREE(ctxt->extSubSystem);
12440 ctxt->extSubSystem = NULL;
12441 if (ctxt->myDoc != NULL)
12442 xmlFreeDoc(ctxt->myDoc);
12443 ctxt->myDoc = NULL;
12444
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012445 ctxt->standalone = -1;
12446 ctxt->hasExternalSubset = 0;
12447 ctxt->hasPErefs = 0;
12448 ctxt->html = 0;
12449 ctxt->external = 0;
12450 ctxt->instate = XML_PARSER_START;
12451 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012452
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012453 ctxt->wellFormed = 1;
12454 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012455 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012456 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012457#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012458 ctxt->vctxt.userData = ctxt;
12459 ctxt->vctxt.error = xmlParserValidityError;
12460 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012461#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012462 ctxt->record_info = 0;
12463 ctxt->nbChars = 0;
12464 ctxt->checkIndex = 0;
12465 ctxt->inSubset = 0;
12466 ctxt->errNo = XML_ERR_OK;
12467 ctxt->depth = 0;
12468 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12469 ctxt->catalogs = NULL;
12470 xmlInitNodeInfoSeq(&ctxt->node_seq);
12471
12472 if (ctxt->attsDefault != NULL) {
12473 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12474 ctxt->attsDefault = NULL;
12475 }
12476 if (ctxt->attsSpecial != NULL) {
12477 xmlHashFree(ctxt->attsSpecial, NULL);
12478 ctxt->attsSpecial = NULL;
12479 }
12480
Daniel Veillard4432df22003-09-28 18:58:27 +000012481#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012482 if (ctxt->catalogs != NULL)
12483 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012484#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012485 if (ctxt->lastError.code != XML_ERR_OK)
12486 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012487}
12488
12489/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012490 * xmlCtxtResetPush:
12491 * @ctxt: an XML parser context
12492 * @chunk: a pointer to an array of chars
12493 * @size: number of chars in the array
12494 * @filename: an optional file name or URI
12495 * @encoding: the document encoding, or NULL
12496 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012497 * Reset a push parser context
12498 *
12499 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012500 */
12501int
12502xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12503 int size, const char *filename, const char *encoding)
12504{
12505 xmlParserInputPtr inputStream;
12506 xmlParserInputBufferPtr buf;
12507 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12508
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012509 if (ctxt == NULL)
12510 return(1);
12511
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012512 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12513 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12514
12515 buf = xmlAllocParserInputBuffer(enc);
12516 if (buf == NULL)
12517 return(1);
12518
12519 if (ctxt == NULL) {
12520 xmlFreeParserInputBuffer(buf);
12521 return(1);
12522 }
12523
12524 xmlCtxtReset(ctxt);
12525
12526 if (ctxt->pushTab == NULL) {
12527 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12528 sizeof(xmlChar *));
12529 if (ctxt->pushTab == NULL) {
12530 xmlErrMemory(ctxt, NULL);
12531 xmlFreeParserInputBuffer(buf);
12532 return(1);
12533 }
12534 }
12535
12536 if (filename == NULL) {
12537 ctxt->directory = NULL;
12538 } else {
12539 ctxt->directory = xmlParserGetDirectory(filename);
12540 }
12541
12542 inputStream = xmlNewInputStream(ctxt);
12543 if (inputStream == NULL) {
12544 xmlFreeParserInputBuffer(buf);
12545 return(1);
12546 }
12547
12548 if (filename == NULL)
12549 inputStream->filename = NULL;
12550 else
12551 inputStream->filename = (char *)
12552 xmlCanonicPath((const xmlChar *) filename);
12553 inputStream->buf = buf;
12554 inputStream->base = inputStream->buf->buffer->content;
12555 inputStream->cur = inputStream->buf->buffer->content;
12556 inputStream->end =
12557 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12558
12559 inputPush(ctxt, inputStream);
12560
12561 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12562 (ctxt->input->buf != NULL)) {
12563 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12564 int cur = ctxt->input->cur - ctxt->input->base;
12565
12566 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12567
12568 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12569 ctxt->input->cur = ctxt->input->base + cur;
12570 ctxt->input->end =
12571 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12572 use];
12573#ifdef DEBUG_PUSH
12574 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12575#endif
12576 }
12577
12578 if (encoding != NULL) {
12579 xmlCharEncodingHandlerPtr hdlr;
12580
12581 hdlr = xmlFindCharEncodingHandler(encoding);
12582 if (hdlr != NULL) {
12583 xmlSwitchToEncoding(ctxt, hdlr);
12584 } else {
12585 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12586 "Unsupported encoding %s\n", BAD_CAST encoding);
12587 }
12588 } else if (enc != XML_CHAR_ENCODING_NONE) {
12589 xmlSwitchEncoding(ctxt, enc);
12590 }
12591
12592 return(0);
12593}
12594
12595/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012596 * xmlCtxtUseOptions:
12597 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012598 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599 *
12600 * Applies the options to the parser context
12601 *
12602 * Returns 0 in case of success, the set of unknown or unimplemented options
12603 * in case of error.
12604 */
12605int
12606xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12607{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012608 if (ctxt == NULL)
12609 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012610 if (options & XML_PARSE_RECOVER) {
12611 ctxt->recovery = 1;
12612 options -= XML_PARSE_RECOVER;
12613 } else
12614 ctxt->recovery = 0;
12615 if (options & XML_PARSE_DTDLOAD) {
12616 ctxt->loadsubset = XML_DETECT_IDS;
12617 options -= XML_PARSE_DTDLOAD;
12618 } else
12619 ctxt->loadsubset = 0;
12620 if (options & XML_PARSE_DTDATTR) {
12621 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12622 options -= XML_PARSE_DTDATTR;
12623 }
12624 if (options & XML_PARSE_NOENT) {
12625 ctxt->replaceEntities = 1;
12626 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12627 options -= XML_PARSE_NOENT;
12628 } else
12629 ctxt->replaceEntities = 0;
12630 if (options & XML_PARSE_NOWARNING) {
12631 ctxt->sax->warning = NULL;
12632 options -= XML_PARSE_NOWARNING;
12633 }
12634 if (options & XML_PARSE_NOERROR) {
12635 ctxt->sax->error = NULL;
12636 ctxt->sax->fatalError = NULL;
12637 options -= XML_PARSE_NOERROR;
12638 }
12639 if (options & XML_PARSE_PEDANTIC) {
12640 ctxt->pedantic = 1;
12641 options -= XML_PARSE_PEDANTIC;
12642 } else
12643 ctxt->pedantic = 0;
12644 if (options & XML_PARSE_NOBLANKS) {
12645 ctxt->keepBlanks = 0;
12646 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12647 options -= XML_PARSE_NOBLANKS;
12648 } else
12649 ctxt->keepBlanks = 1;
12650 if (options & XML_PARSE_DTDVALID) {
12651 ctxt->validate = 1;
12652 if (options & XML_PARSE_NOWARNING)
12653 ctxt->vctxt.warning = NULL;
12654 if (options & XML_PARSE_NOERROR)
12655 ctxt->vctxt.error = NULL;
12656 options -= XML_PARSE_DTDVALID;
12657 } else
12658 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012659#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012660 if (options & XML_PARSE_SAX1) {
12661 ctxt->sax->startElement = xmlSAX2StartElement;
12662 ctxt->sax->endElement = xmlSAX2EndElement;
12663 ctxt->sax->startElementNs = NULL;
12664 ctxt->sax->endElementNs = NULL;
12665 ctxt->sax->initialized = 1;
12666 options -= XML_PARSE_SAX1;
12667 }
Daniel Veillard81273902003-09-30 00:43:48 +000012668#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012669 if (options & XML_PARSE_NODICT) {
12670 ctxt->dictNames = 0;
12671 options -= XML_PARSE_NODICT;
12672 } else {
12673 ctxt->dictNames = 1;
12674 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012675 if (options & XML_PARSE_NOCDATA) {
12676 ctxt->sax->cdataBlock = NULL;
12677 options -= XML_PARSE_NOCDATA;
12678 }
12679 if (options & XML_PARSE_NSCLEAN) {
12680 ctxt->options |= XML_PARSE_NSCLEAN;
12681 options -= XML_PARSE_NSCLEAN;
12682 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012683 if (options & XML_PARSE_NONET) {
12684 ctxt->options |= XML_PARSE_NONET;
12685 options -= XML_PARSE_NONET;
12686 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012687 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012688 return (options);
12689}
12690
12691/**
12692 * xmlDoRead:
12693 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012694 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012695 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012696 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012697 * @reuse: keep the context for reuse
12698 *
12699 * Common front-end for the xmlRead functions
12700 *
12701 * Returns the resulting document tree or NULL
12702 */
12703static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012704xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12705 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012706{
12707 xmlDocPtr ret;
12708
12709 xmlCtxtUseOptions(ctxt, options);
12710 if (encoding != NULL) {
12711 xmlCharEncodingHandlerPtr hdlr;
12712
12713 hdlr = xmlFindCharEncodingHandler(encoding);
12714 if (hdlr != NULL)
12715 xmlSwitchToEncoding(ctxt, hdlr);
12716 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012717 if ((URL != NULL) && (ctxt->input != NULL) &&
12718 (ctxt->input->filename == NULL))
12719 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012720 xmlParseDocument(ctxt);
12721 if ((ctxt->wellFormed) || ctxt->recovery)
12722 ret = ctxt->myDoc;
12723 else {
12724 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012725 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012726 xmlFreeDoc(ctxt->myDoc);
12727 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012728 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012729 ctxt->myDoc = NULL;
12730 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012731 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012732 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012733
12734 return (ret);
12735}
12736
12737/**
12738 * xmlReadDoc:
12739 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012740 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012741 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012742 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012743 *
12744 * parse an XML in-memory document and build a tree.
12745 *
12746 * Returns the resulting document tree
12747 */
12748xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012749xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012750{
12751 xmlParserCtxtPtr ctxt;
12752
12753 if (cur == NULL)
12754 return (NULL);
12755
12756 ctxt = xmlCreateDocParserCtxt(cur);
12757 if (ctxt == NULL)
12758 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012759 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012760}
12761
12762/**
12763 * xmlReadFile:
12764 * @filename: a file or URL
12765 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012766 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012767 *
12768 * parse an XML file from the filesystem or the network.
12769 *
12770 * Returns the resulting document tree
12771 */
12772xmlDocPtr
12773xmlReadFile(const char *filename, const char *encoding, int options)
12774{
12775 xmlParserCtxtPtr ctxt;
12776
Daniel Veillard61b93382003-11-03 14:28:31 +000012777 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012778 if (ctxt == NULL)
12779 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012780 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012781}
12782
12783/**
12784 * xmlReadMemory:
12785 * @buffer: a pointer to a char array
12786 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012787 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012788 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012789 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012790 *
12791 * parse an XML in-memory document and build a tree.
12792 *
12793 * Returns the resulting document tree
12794 */
12795xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012796xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012797{
12798 xmlParserCtxtPtr ctxt;
12799
12800 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12801 if (ctxt == NULL)
12802 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012803 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012804}
12805
12806/**
12807 * xmlReadFd:
12808 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012809 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012810 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012811 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012812 *
12813 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012814 * NOTE that the file descriptor will not be closed when the
12815 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012816 *
12817 * Returns the resulting document tree
12818 */
12819xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012820xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012821{
12822 xmlParserCtxtPtr ctxt;
12823 xmlParserInputBufferPtr input;
12824 xmlParserInputPtr stream;
12825
12826 if (fd < 0)
12827 return (NULL);
12828
12829 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12830 if (input == NULL)
12831 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012832 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012833 ctxt = xmlNewParserCtxt();
12834 if (ctxt == NULL) {
12835 xmlFreeParserInputBuffer(input);
12836 return (NULL);
12837 }
12838 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12839 if (stream == NULL) {
12840 xmlFreeParserInputBuffer(input);
12841 xmlFreeParserCtxt(ctxt);
12842 return (NULL);
12843 }
12844 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012845 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012846}
12847
12848/**
12849 * xmlReadIO:
12850 * @ioread: an I/O read function
12851 * @ioclose: an I/O close function
12852 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012853 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012854 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012855 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012856 *
12857 * parse an XML document from I/O functions and source and build a tree.
12858 *
12859 * Returns the resulting document tree
12860 */
12861xmlDocPtr
12862xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012863 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012864{
12865 xmlParserCtxtPtr ctxt;
12866 xmlParserInputBufferPtr input;
12867 xmlParserInputPtr stream;
12868
12869 if (ioread == NULL)
12870 return (NULL);
12871
12872 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12873 XML_CHAR_ENCODING_NONE);
12874 if (input == NULL)
12875 return (NULL);
12876 ctxt = xmlNewParserCtxt();
12877 if (ctxt == NULL) {
12878 xmlFreeParserInputBuffer(input);
12879 return (NULL);
12880 }
12881 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12882 if (stream == NULL) {
12883 xmlFreeParserInputBuffer(input);
12884 xmlFreeParserCtxt(ctxt);
12885 return (NULL);
12886 }
12887 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012888 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012889}
12890
12891/**
12892 * xmlCtxtReadDoc:
12893 * @ctxt: an XML parser context
12894 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012895 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012896 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012897 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012898 *
12899 * parse an XML in-memory document and build a tree.
12900 * This reuses the existing @ctxt parser context
12901 *
12902 * Returns the resulting document tree
12903 */
12904xmlDocPtr
12905xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012906 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012907{
12908 xmlParserInputPtr stream;
12909
12910 if (cur == NULL)
12911 return (NULL);
12912 if (ctxt == NULL)
12913 return (NULL);
12914
12915 xmlCtxtReset(ctxt);
12916
12917 stream = xmlNewStringInputStream(ctxt, cur);
12918 if (stream == NULL) {
12919 return (NULL);
12920 }
12921 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012922 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012923}
12924
12925/**
12926 * xmlCtxtReadFile:
12927 * @ctxt: an XML parser context
12928 * @filename: a file or URL
12929 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012930 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012931 *
12932 * parse an XML file from the filesystem or the network.
12933 * This reuses the existing @ctxt parser context
12934 *
12935 * Returns the resulting document tree
12936 */
12937xmlDocPtr
12938xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12939 const char *encoding, int options)
12940{
12941 xmlParserInputPtr stream;
12942
12943 if (filename == NULL)
12944 return (NULL);
12945 if (ctxt == NULL)
12946 return (NULL);
12947
12948 xmlCtxtReset(ctxt);
12949
Daniel Veillard29614c72004-11-26 10:47:26 +000012950 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012951 if (stream == NULL) {
12952 return (NULL);
12953 }
12954 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012955 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012956}
12957
12958/**
12959 * xmlCtxtReadMemory:
12960 * @ctxt: an XML parser context
12961 * @buffer: a pointer to a char array
12962 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012963 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012964 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012965 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012966 *
12967 * parse an XML in-memory document and build a tree.
12968 * This reuses the existing @ctxt parser context
12969 *
12970 * Returns the resulting document tree
12971 */
12972xmlDocPtr
12973xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012974 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012975{
12976 xmlParserInputBufferPtr input;
12977 xmlParserInputPtr stream;
12978
12979 if (ctxt == NULL)
12980 return (NULL);
12981 if (buffer == NULL)
12982 return (NULL);
12983
12984 xmlCtxtReset(ctxt);
12985
12986 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12987 if (input == NULL) {
12988 return(NULL);
12989 }
12990
12991 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12992 if (stream == NULL) {
12993 xmlFreeParserInputBuffer(input);
12994 return(NULL);
12995 }
12996
12997 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012998 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012999}
13000
13001/**
13002 * xmlCtxtReadFd:
13003 * @ctxt: an XML parser context
13004 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013005 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013007 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 *
13009 * parse an XML from a file descriptor and build a tree.
13010 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013011 * NOTE that the file descriptor will not be closed when the
13012 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013013 *
13014 * Returns the resulting document tree
13015 */
13016xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013017xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13018 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013019{
13020 xmlParserInputBufferPtr input;
13021 xmlParserInputPtr stream;
13022
13023 if (fd < 0)
13024 return (NULL);
13025 if (ctxt == NULL)
13026 return (NULL);
13027
13028 xmlCtxtReset(ctxt);
13029
13030
13031 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13032 if (input == NULL)
13033 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013034 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013035 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13036 if (stream == NULL) {
13037 xmlFreeParserInputBuffer(input);
13038 return (NULL);
13039 }
13040 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013041 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013042}
13043
13044/**
13045 * xmlCtxtReadIO:
13046 * @ctxt: an XML parser context
13047 * @ioread: an I/O read function
13048 * @ioclose: an I/O close function
13049 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013050 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013051 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013052 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013053 *
13054 * parse an XML document from I/O functions and source and build a tree.
13055 * This reuses the existing @ctxt parser context
13056 *
13057 * Returns the resulting document tree
13058 */
13059xmlDocPtr
13060xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13061 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013062 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013063 const char *encoding, int options)
13064{
13065 xmlParserInputBufferPtr input;
13066 xmlParserInputPtr stream;
13067
13068 if (ioread == NULL)
13069 return (NULL);
13070 if (ctxt == NULL)
13071 return (NULL);
13072
13073 xmlCtxtReset(ctxt);
13074
13075 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13076 XML_CHAR_ENCODING_NONE);
13077 if (input == NULL)
13078 return (NULL);
13079 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13080 if (stream == NULL) {
13081 xmlFreeParserInputBuffer(input);
13082 return (NULL);
13083 }
13084 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013085 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013086}