blob: ec7fb53134212466707e5490613d3e719f32fb38 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003458 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003459 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003460 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003461 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 goto get_more_space;
3463 }
3464 if (*in == '<') {
3465 nbchar = in - ctxt->input->cur;
3466 if (nbchar > 0) {
3467 const xmlChar *tmp = ctxt->input->cur;
3468 ctxt->input->cur = in;
3469
Daniel Veillard34099b42004-11-04 17:34:35 +00003470 if ((ctxt->sax != NULL) &&
3471 (ctxt->sax->ignorableWhitespace !=
3472 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003473 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003474 if (ctxt->sax->ignorableWhitespace != NULL)
3475 ctxt->sax->ignorableWhitespace(ctxt->userData,
3476 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003477 } else if (ctxt->sax->characters != NULL)
3478 ctxt->sax->characters(ctxt->userData,
3479 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003480 } else if ((ctxt->sax != NULL) &&
3481 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003482 ctxt->sax->characters(ctxt->userData,
3483 tmp, nbchar);
3484 }
3485 }
3486 return;
3487 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003488
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003489get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003491 while (test_char_data[*in]) {
3492 in++;
3493 ccol++;
3494 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003495 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003496 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003497 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003498 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003499 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003500 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003501 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003502 }
3503 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003504 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003506 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003507 return;
3508 }
3509 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003510 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 goto get_more;
3512 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003513 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003514 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003515 if ((ctxt->sax != NULL) &&
3516 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003517 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003518 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003519 const xmlChar *tmp = ctxt->input->cur;
3520 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003522 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003523 if (ctxt->sax->ignorableWhitespace != NULL)
3524 ctxt->sax->ignorableWhitespace(ctxt->userData,
3525 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003526 } else if (ctxt->sax->characters != NULL)
3527 ctxt->sax->characters(ctxt->userData,
3528 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003529 line = ctxt->input->line;
3530 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003531 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003532 if (ctxt->sax->characters != NULL)
3533 ctxt->sax->characters(ctxt->userData,
3534 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003535 line = ctxt->input->line;
3536 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003537 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003538 }
3539 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003540 if (*in == 0xD) {
3541 in++;
William M. Brackdc904f12005-10-22 02:04:26 +00003542 if (!*in) /* if end of current chunk return */
3543 return;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003544 if (*in == 0xA) {
3545 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003546 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003547 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003549 }
William M. Brackf4caa5e2005-10-20 09:04:05 +00003550 if (!*in) /* if end of current chunk return */
3551 return;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003552 in--;
3553 }
3554 if (*in == '<') {
3555 return;
3556 }
3557 if (*in == '&') {
3558 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003559 }
3560 SHRINK;
3561 GROW;
3562 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003563 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003564 nbchar = 0;
3565 }
Daniel Veillard50582112001-03-26 22:52:16 +00003566 ctxt->input->line = line;
3567 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003568 xmlParseCharDataComplex(ctxt, cdata);
3569}
3570
Daniel Veillard01c13b52002-12-10 15:19:08 +00003571/**
3572 * xmlParseCharDataComplex:
3573 * @ctxt: an XML parser context
3574 * @cdata: int indicating whether we are within a CDATA section
3575 *
3576 * parse a CharData section.this is the fallback function
3577 * of xmlParseCharData() when the parsing requires handling
3578 * of non-ASCII characters.
3579 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003580void
3581xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003582 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3583 int nbchar = 0;
3584 int cur, l;
3585 int count = 0;
3586
3587 SHRINK;
3588 GROW;
3589 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003590 while ((cur != '<') && /* checked */
3591 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003592 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003593 if ((cur == ']') && (NXT(1) == ']') &&
3594 (NXT(2) == '>')) {
3595 if (cdata) break;
3596 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003597 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003598 }
3599 }
3600 COPY_BUF(l,buf,nbchar,cur);
3601 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003602 buf[nbchar] = 0;
3603
Owen Taylor3473f882001-02-23 17:55:21 +00003604 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003605 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003606 */
3607 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003608 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003609 if (ctxt->sax->ignorableWhitespace != NULL)
3610 ctxt->sax->ignorableWhitespace(ctxt->userData,
3611 buf, nbchar);
3612 } else {
3613 if (ctxt->sax->characters != NULL)
3614 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3615 }
3616 }
3617 nbchar = 0;
3618 }
3619 count++;
3620 if (count > 50) {
3621 GROW;
3622 count = 0;
3623 }
3624 NEXTL(l);
3625 cur = CUR_CHAR(l);
3626 }
3627 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003628 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003629 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003630 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003631 */
3632 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003633 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003634 if (ctxt->sax->ignorableWhitespace != NULL)
3635 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3636 } else {
3637 if (ctxt->sax->characters != NULL)
3638 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3639 }
3640 }
3641 }
3642}
3643
3644/**
3645 * xmlParseExternalID:
3646 * @ctxt: an XML parser context
3647 * @publicID: a xmlChar** receiving PubidLiteral
3648 * @strict: indicate whether we should restrict parsing to only
3649 * production [75], see NOTE below
3650 *
3651 * Parse an External ID or a Public ID
3652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003653 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003654 * 'PUBLIC' S PubidLiteral S SystemLiteral
3655 *
3656 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3657 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3658 *
3659 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3660 *
3661 * Returns the function returns SystemLiteral and in the second
3662 * case publicID receives PubidLiteral, is strict is off
3663 * it is possible to return NULL and have publicID set.
3664 */
3665
3666xmlChar *
3667xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3668 xmlChar *URI = NULL;
3669
3670 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003671
3672 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003673 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003674 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003675 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003676 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3677 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003678 }
3679 SKIP_BLANKS;
3680 URI = xmlParseSystemLiteral(ctxt);
3681 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003682 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003683 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003684 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003685 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003686 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003688 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003689 }
3690 SKIP_BLANKS;
3691 *publicID = xmlParsePubidLiteral(ctxt);
3692 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003693 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 if (strict) {
3696 /*
3697 * We don't handle [83] so "S SystemLiteral" is required.
3698 */
William M. Brack76e95df2003-10-18 16:20:14 +00003699 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003701 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003702 }
3703 } else {
3704 /*
3705 * We handle [83] so we return immediately, if
3706 * "S SystemLiteral" is not detected. From a purely parsing
3707 * point of view that's a nice mess.
3708 */
3709 const xmlChar *ptr;
3710 GROW;
3711
3712 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003713 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003714
William M. Brack76e95df2003-10-18 16:20:14 +00003715 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003716 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3717 }
3718 SKIP_BLANKS;
3719 URI = xmlParseSystemLiteral(ctxt);
3720 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003721 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723 }
3724 return(URI);
3725}
3726
3727/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003728 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003729 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003730 * @buf: the already parsed part of the buffer
3731 * @len: number of bytes filles in the buffer
3732 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003733 *
3734 * Skip an XML (SGML) comment <!-- .... -->
3735 * The spec says that "For compatibility, the string "--" (double-hyphen)
3736 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003737 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003738 *
3739 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3740 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003741static void
3742xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003743 int q, ql;
3744 int r, rl;
3745 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003746 xmlParserInputPtr input = ctxt->input;
3747 int count = 0;
3748
Owen Taylor3473f882001-02-23 17:55:21 +00003749 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003750 len = 0;
3751 size = XML_PARSER_BUFFER_SIZE;
3752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3753 if (buf == NULL) {
3754 xmlErrMemory(ctxt, NULL);
3755 return;
3756 }
Owen Taylor3473f882001-02-23 17:55:21 +00003757 }
3758 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003759 if (q == 0)
3760 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 NEXTL(ql);
3762 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003763 if (r == 0)
3764 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003765 NEXTL(rl);
3766 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003767 if (cur == 0)
3768 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003769 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003770 ((cur != '>') ||
3771 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003772 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003773 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003774 }
3775 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003776 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003777 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003778 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3779 if (new_buf == NULL) {
3780 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 return;
3783 }
William M. Bracka3215c72004-07-31 16:24:01 +00003784 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 }
3786 COPY_BUF(ql,buf,len,q);
3787 q = r;
3788 ql = rl;
3789 r = cur;
3790 rl = l;
3791
3792 count++;
3793 if (count > 50) {
3794 GROW;
3795 count = 0;
3796 }
3797 NEXTL(l);
3798 cur = CUR_CHAR(l);
3799 if (cur == 0) {
3800 SHRINK;
3801 GROW;
3802 cur = CUR_CHAR(l);
3803 }
3804 }
3805 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003806 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003807 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003808 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 xmlFree(buf);
3810 } else {
3811 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003812 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3813 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 NEXT;
3816 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3817 (!ctxt->disableSAX))
3818 ctxt->sax->comment(ctxt->userData, buf);
3819 xmlFree(buf);
3820 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003821 return;
3822not_terminated:
3823 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3824 "Comment not terminated\n", NULL);
3825 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003826}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003827/**
3828 * xmlParseComment:
3829 * @ctxt: an XML parser context
3830 *
3831 * Skip an XML (SGML) comment <!-- .... -->
3832 * The spec says that "For compatibility, the string "--" (double-hyphen)
3833 * must not occur within comments. "
3834 *
3835 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3836 */
3837void
3838xmlParseComment(xmlParserCtxtPtr ctxt) {
3839 xmlChar *buf = NULL;
3840 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003841 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003842 xmlParserInputState state;
3843 const xmlChar *in;
3844 int nbchar = 0, ccol;
3845
3846 /*
3847 * Check that there is a comment right here.
3848 */
3849 if ((RAW != '<') || (NXT(1) != '!') ||
3850 (NXT(2) != '-') || (NXT(3) != '-')) return;
3851
3852 state = ctxt->instate;
3853 ctxt->instate = XML_PARSER_COMMENT;
3854 SKIP(4);
3855 SHRINK;
3856 GROW;
3857
3858 /*
3859 * Accelerated common case where input don't need to be
3860 * modified before passing it to the handler.
3861 */
3862 in = ctxt->input->cur;
3863 do {
3864 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003865 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003866 ctxt->input->line++; ctxt->input->col = 1;
3867 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003868 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003869 }
3870get_more:
3871 ccol = ctxt->input->col;
3872 while (((*in > '-') && (*in <= 0x7F)) ||
3873 ((*in >= 0x20) && (*in < '-')) ||
3874 (*in == 0x09)) {
3875 in++;
3876 ccol++;
3877 }
3878 ctxt->input->col = ccol;
3879 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003880 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003881 ctxt->input->line++; ctxt->input->col = 1;
3882 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003883 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003884 goto get_more;
3885 }
3886 nbchar = in - ctxt->input->cur;
3887 /*
3888 * save current set of data
3889 */
3890 if (nbchar > 0) {
3891 if ((ctxt->sax != NULL) &&
3892 (ctxt->sax->comment != NULL)) {
3893 if (buf == NULL) {
3894 if ((*in == '-') && (in[1] == '-'))
3895 size = nbchar + 1;
3896 else
3897 size = XML_PARSER_BUFFER_SIZE + nbchar;
3898 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3899 if (buf == NULL) {
3900 xmlErrMemory(ctxt, NULL);
3901 ctxt->instate = state;
3902 return;
3903 }
3904 len = 0;
3905 } else if (len + nbchar + 1 >= size) {
3906 xmlChar *new_buf;
3907 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3908 new_buf = (xmlChar *) xmlRealloc(buf,
3909 size * sizeof(xmlChar));
3910 if (new_buf == NULL) {
3911 xmlFree (buf);
3912 xmlErrMemory(ctxt, NULL);
3913 ctxt->instate = state;
3914 return;
3915 }
3916 buf = new_buf;
3917 }
3918 memcpy(&buf[len], ctxt->input->cur, nbchar);
3919 len += nbchar;
3920 buf[len] = 0;
3921 }
3922 }
3923 ctxt->input->cur = in;
3924 if (*in == 0xA)
3925
3926 if (*in == 0xD) {
3927 in++;
3928 if (*in == 0xA) {
3929 ctxt->input->cur = in;
3930 in++;
3931 ctxt->input->line++; ctxt->input->col = 1;
3932 continue; /* while */
3933 }
William M. Brackdc904f12005-10-22 02:04:26 +00003934 if (!*in) /* if end of current chunk return */
3935 return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003936 in--;
3937 }
3938 SHRINK;
3939 GROW;
3940 in = ctxt->input->cur;
3941 if (*in == '-') {
3942 if (in[1] == '-') {
3943 if (in[2] == '>') {
3944 SKIP(3);
3945 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3946 (!ctxt->disableSAX)) {
3947 if (buf != NULL)
3948 ctxt->sax->comment(ctxt->userData, buf);
3949 else
3950 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3951 }
3952 if (buf != NULL)
3953 xmlFree(buf);
3954 ctxt->instate = state;
3955 return;
3956 }
3957 if (buf != NULL)
3958 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3959 "Comment not terminated \n<!--%.50s\n",
3960 buf);
3961 else
3962 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3963 "Comment not terminated \n", NULL);
3964 in++;
3965 ctxt->input->col++;
3966 }
3967 in++;
3968 ctxt->input->col++;
3969 goto get_more;
3970 }
3971 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3972 xmlParseCommentComplex(ctxt, buf, len, size);
3973 ctxt->instate = state;
3974 return;
3975}
3976
Owen Taylor3473f882001-02-23 17:55:21 +00003977
3978/**
3979 * xmlParsePITarget:
3980 * @ctxt: an XML parser context
3981 *
3982 * parse the name of a PI
3983 *
3984 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3985 *
3986 * Returns the PITarget name or NULL
3987 */
3988
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003989const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003990xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003991 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003992
3993 name = xmlParseName(ctxt);
3994 if ((name != NULL) &&
3995 ((name[0] == 'x') || (name[0] == 'X')) &&
3996 ((name[1] == 'm') || (name[1] == 'M')) &&
3997 ((name[2] == 'l') || (name[2] == 'L'))) {
3998 int i;
3999 if ((name[0] == 'x') && (name[1] == 'm') &&
4000 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004001 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004002 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004003 return(name);
4004 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004005 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004006 return(name);
4007 }
4008 for (i = 0;;i++) {
4009 if (xmlW3CPIs[i] == NULL) break;
4010 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4011 return(name);
4012 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004013 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4014 "xmlParsePITarget: invalid name prefix 'xml'\n",
4015 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004016 }
4017 return(name);
4018}
4019
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004020#ifdef LIBXML_CATALOG_ENABLED
4021/**
4022 * xmlParseCatalogPI:
4023 * @ctxt: an XML parser context
4024 * @catalog: the PI value string
4025 *
4026 * parse an XML Catalog Processing Instruction.
4027 *
4028 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4029 *
4030 * Occurs only if allowed by the user and if happening in the Misc
4031 * part of the document before any doctype informations
4032 * This will add the given catalog to the parsing context in order
4033 * to be used if there is a resolution need further down in the document
4034 */
4035
4036static void
4037xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4038 xmlChar *URL = NULL;
4039 const xmlChar *tmp, *base;
4040 xmlChar marker;
4041
4042 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004043 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004044 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4045 goto error;
4046 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004047 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004048 if (*tmp != '=') {
4049 return;
4050 }
4051 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004052 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004053 marker = *tmp;
4054 if ((marker != '\'') && (marker != '"'))
4055 goto error;
4056 tmp++;
4057 base = tmp;
4058 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4059 if (*tmp == 0)
4060 goto error;
4061 URL = xmlStrndup(base, tmp - base);
4062 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004063 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004064 if (*tmp != 0)
4065 goto error;
4066
4067 if (URL != NULL) {
4068 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4069 xmlFree(URL);
4070 }
4071 return;
4072
4073error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004074 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4075 "Catalog PI syntax error: %s\n",
4076 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004077 if (URL != NULL)
4078 xmlFree(URL);
4079}
4080#endif
4081
Owen Taylor3473f882001-02-23 17:55:21 +00004082/**
4083 * xmlParsePI:
4084 * @ctxt: an XML parser context
4085 *
4086 * parse an XML Processing Instruction.
4087 *
4088 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4089 *
4090 * The processing is transfered to SAX once parsed.
4091 */
4092
4093void
4094xmlParsePI(xmlParserCtxtPtr ctxt) {
4095 xmlChar *buf = NULL;
4096 int len = 0;
4097 int size = XML_PARSER_BUFFER_SIZE;
4098 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004099 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004100 xmlParserInputState state;
4101 int count = 0;
4102
4103 if ((RAW == '<') && (NXT(1) == '?')) {
4104 xmlParserInputPtr input = ctxt->input;
4105 state = ctxt->instate;
4106 ctxt->instate = XML_PARSER_PI;
4107 /*
4108 * this is a Processing Instruction.
4109 */
4110 SKIP(2);
4111 SHRINK;
4112
4113 /*
4114 * Parse the target name and check for special support like
4115 * namespace.
4116 */
4117 target = xmlParsePITarget(ctxt);
4118 if (target != NULL) {
4119 if ((RAW == '?') && (NXT(1) == '>')) {
4120 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004121 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4122 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004123 }
4124 SKIP(2);
4125
4126 /*
4127 * SAX: PI detected.
4128 */
4129 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4130 (ctxt->sax->processingInstruction != NULL))
4131 ctxt->sax->processingInstruction(ctxt->userData,
4132 target, NULL);
4133 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004134 return;
4135 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004137 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004138 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 ctxt->instate = state;
4140 return;
4141 }
4142 cur = CUR;
4143 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004144 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4145 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004146 }
4147 SKIP_BLANKS;
4148 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004149 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004150 ((cur != '?') || (NXT(1) != '>'))) {
4151 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004152 xmlChar *tmp;
4153
Owen Taylor3473f882001-02-23 17:55:21 +00004154 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004155 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4156 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004158 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 ctxt->instate = state;
4160 return;
4161 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004162 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004163 }
4164 count++;
4165 if (count > 50) {
4166 GROW;
4167 count = 0;
4168 }
4169 COPY_BUF(l,buf,len,cur);
4170 NEXTL(l);
4171 cur = CUR_CHAR(l);
4172 if (cur == 0) {
4173 SHRINK;
4174 GROW;
4175 cur = CUR_CHAR(l);
4176 }
4177 }
4178 buf[len] = 0;
4179 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004180 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4181 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004182 } else {
4183 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004184 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4185 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004186 }
4187 SKIP(2);
4188
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004189#ifdef LIBXML_CATALOG_ENABLED
4190 if (((state == XML_PARSER_MISC) ||
4191 (state == XML_PARSER_START)) &&
4192 (xmlStrEqual(target, XML_CATALOG_PI))) {
4193 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4194 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4195 (allow == XML_CATA_ALLOW_ALL))
4196 xmlParseCatalogPI(ctxt, buf);
4197 }
4198#endif
4199
4200
Owen Taylor3473f882001-02-23 17:55:21 +00004201 /*
4202 * SAX: PI detected.
4203 */
4204 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4205 (ctxt->sax->processingInstruction != NULL))
4206 ctxt->sax->processingInstruction(ctxt->userData,
4207 target, buf);
4208 }
4209 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 }
4213 ctxt->instate = state;
4214 }
4215}
4216
4217/**
4218 * xmlParseNotationDecl:
4219 * @ctxt: an XML parser context
4220 *
4221 * parse a notation declaration
4222 *
4223 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4224 *
4225 * Hence there is actually 3 choices:
4226 * 'PUBLIC' S PubidLiteral
4227 * 'PUBLIC' S PubidLiteral S SystemLiteral
4228 * and 'SYSTEM' S SystemLiteral
4229 *
4230 * See the NOTE on xmlParseExternalID().
4231 */
4232
4233void
4234xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004235 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004236 xmlChar *Pubid;
4237 xmlChar *Systemid;
4238
Daniel Veillarda07050d2003-10-19 14:46:32 +00004239 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004240 xmlParserInputPtr input = ctxt->input;
4241 SHRINK;
4242 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004243 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004244 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4245 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004246 return;
4247 }
4248 SKIP_BLANKS;
4249
Daniel Veillard76d66f42001-05-16 21:05:17 +00004250 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004251 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004252 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004253 return;
4254 }
William M. Brack76e95df2003-10-18 16:20:14 +00004255 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004256 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004257 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004258 return;
4259 }
4260 SKIP_BLANKS;
4261
4262 /*
4263 * Parse the IDs.
4264 */
4265 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4266 SKIP_BLANKS;
4267
4268 if (RAW == '>') {
4269 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004270 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4271 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004272 }
4273 NEXT;
4274 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4275 (ctxt->sax->notationDecl != NULL))
4276 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4277 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004278 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004279 }
Owen Taylor3473f882001-02-23 17:55:21 +00004280 if (Systemid != NULL) xmlFree(Systemid);
4281 if (Pubid != NULL) xmlFree(Pubid);
4282 }
4283}
4284
4285/**
4286 * xmlParseEntityDecl:
4287 * @ctxt: an XML parser context
4288 *
4289 * parse <!ENTITY declarations
4290 *
4291 * [70] EntityDecl ::= GEDecl | PEDecl
4292 *
4293 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4294 *
4295 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4296 *
4297 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4298 *
4299 * [74] PEDef ::= EntityValue | ExternalID
4300 *
4301 * [76] NDataDecl ::= S 'NDATA' S Name
4302 *
4303 * [ VC: Notation Declared ]
4304 * The Name must match the declared name of a notation.
4305 */
4306
4307void
4308xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004309 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004310 xmlChar *value = NULL;
4311 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004312 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004313 int isParameter = 0;
4314 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004315 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004316
Daniel Veillard4c778d82005-01-23 17:37:44 +00004317 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004318 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004319 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 SHRINK;
4321 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004322 skipped = SKIP_BLANKS;
4323 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004324 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4325 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004326 }
Owen Taylor3473f882001-02-23 17:55:21 +00004327
4328 if (RAW == '%') {
4329 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004330 skipped = SKIP_BLANKS;
4331 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4333 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
Owen Taylor3473f882001-02-23 17:55:21 +00004335 isParameter = 1;
4336 }
4337
Daniel Veillard76d66f42001-05-16 21:05:17 +00004338 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004339 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4341 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 return;
4343 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004344 skipped = SKIP_BLANKS;
4345 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4347 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
Owen Taylor3473f882001-02-23 17:55:21 +00004349
Daniel Veillardf5582f12002-06-11 10:08:16 +00004350 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004351 /*
4352 * handle the various case of definitions...
4353 */
4354 if (isParameter) {
4355 if ((RAW == '"') || (RAW == '\'')) {
4356 value = xmlParseEntityValue(ctxt, &orig);
4357 if (value) {
4358 if ((ctxt->sax != NULL) &&
4359 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4360 ctxt->sax->entityDecl(ctxt->userData, name,
4361 XML_INTERNAL_PARAMETER_ENTITY,
4362 NULL, NULL, value);
4363 }
4364 } else {
4365 URI = xmlParseExternalID(ctxt, &literal, 1);
4366 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004367 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004368 }
4369 if (URI) {
4370 xmlURIPtr uri;
4371
4372 uri = xmlParseURI((const char *) URI);
4373 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004374 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4375 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004376 /*
4377 * This really ought to be a well formedness error
4378 * but the XML Core WG decided otherwise c.f. issue
4379 * E26 of the XML erratas.
4380 */
Owen Taylor3473f882001-02-23 17:55:21 +00004381 } else {
4382 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004383 /*
4384 * Okay this is foolish to block those but not
4385 * invalid URIs.
4386 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004387 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004388 } else {
4389 if ((ctxt->sax != NULL) &&
4390 (!ctxt->disableSAX) &&
4391 (ctxt->sax->entityDecl != NULL))
4392 ctxt->sax->entityDecl(ctxt->userData, name,
4393 XML_EXTERNAL_PARAMETER_ENTITY,
4394 literal, URI, NULL);
4395 }
4396 xmlFreeURI(uri);
4397 }
4398 }
4399 }
4400 } else {
4401 if ((RAW == '"') || (RAW == '\'')) {
4402 value = xmlParseEntityValue(ctxt, &orig);
4403 if ((ctxt->sax != NULL) &&
4404 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4405 ctxt->sax->entityDecl(ctxt->userData, name,
4406 XML_INTERNAL_GENERAL_ENTITY,
4407 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004408 /*
4409 * For expat compatibility in SAX mode.
4410 */
4411 if ((ctxt->myDoc == NULL) ||
4412 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4413 if (ctxt->myDoc == NULL) {
4414 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4415 }
4416 if (ctxt->myDoc->intSubset == NULL)
4417 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4418 BAD_CAST "fake", NULL, NULL);
4419
Daniel Veillard1af9a412003-08-20 22:54:39 +00004420 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4421 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004422 }
Owen Taylor3473f882001-02-23 17:55:21 +00004423 } else {
4424 URI = xmlParseExternalID(ctxt, &literal, 1);
4425 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004426 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004427 }
4428 if (URI) {
4429 xmlURIPtr uri;
4430
4431 uri = xmlParseURI((const char *)URI);
4432 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004433 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4434 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004435 /*
4436 * This really ought to be a well formedness error
4437 * but the XML Core WG decided otherwise c.f. issue
4438 * E26 of the XML erratas.
4439 */
Owen Taylor3473f882001-02-23 17:55:21 +00004440 } else {
4441 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004442 /*
4443 * Okay this is foolish to block those but not
4444 * invalid URIs.
4445 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004446 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004447 }
4448 xmlFreeURI(uri);
4449 }
4450 }
William M. Brack76e95df2003-10-18 16:20:14 +00004451 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4453 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004454 }
4455 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004456 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004457 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004458 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004459 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4460 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004461 }
4462 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004463 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004464 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4465 (ctxt->sax->unparsedEntityDecl != NULL))
4466 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4467 literal, URI, ndata);
4468 } else {
4469 if ((ctxt->sax != NULL) &&
4470 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4471 ctxt->sax->entityDecl(ctxt->userData, name,
4472 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4473 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004474 /*
4475 * For expat compatibility in SAX mode.
4476 * assuming the entity repalcement was asked for
4477 */
4478 if ((ctxt->replaceEntities != 0) &&
4479 ((ctxt->myDoc == NULL) ||
4480 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4481 if (ctxt->myDoc == NULL) {
4482 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4483 }
4484
4485 if (ctxt->myDoc->intSubset == NULL)
4486 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4487 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004488 xmlSAX2EntityDecl(ctxt, name,
4489 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4490 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004491 }
Owen Taylor3473f882001-02-23 17:55:21 +00004492 }
4493 }
4494 }
4495 SKIP_BLANKS;
4496 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004497 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004498 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004499 } else {
4500 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004501 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4502 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
4504 NEXT;
4505 }
4506 if (orig != NULL) {
4507 /*
4508 * Ugly mechanism to save the raw entity value.
4509 */
4510 xmlEntityPtr cur = NULL;
4511
4512 if (isParameter) {
4513 if ((ctxt->sax != NULL) &&
4514 (ctxt->sax->getParameterEntity != NULL))
4515 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4516 } else {
4517 if ((ctxt->sax != NULL) &&
4518 (ctxt->sax->getEntity != NULL))
4519 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004520 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004521 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004522 }
Owen Taylor3473f882001-02-23 17:55:21 +00004523 }
4524 if (cur != NULL) {
4525 if (cur->orig != NULL)
4526 xmlFree(orig);
4527 else
4528 cur->orig = orig;
4529 } else
4530 xmlFree(orig);
4531 }
Owen Taylor3473f882001-02-23 17:55:21 +00004532 if (value != NULL) xmlFree(value);
4533 if (URI != NULL) xmlFree(URI);
4534 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004535 }
4536}
4537
4538/**
4539 * xmlParseDefaultDecl:
4540 * @ctxt: an XML parser context
4541 * @value: Receive a possible fixed default value for the attribute
4542 *
4543 * Parse an attribute default declaration
4544 *
4545 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4546 *
4547 * [ VC: Required Attribute ]
4548 * if the default declaration is the keyword #REQUIRED, then the
4549 * attribute must be specified for all elements of the type in the
4550 * attribute-list declaration.
4551 *
4552 * [ VC: Attribute Default Legal ]
4553 * The declared default value must meet the lexical constraints of
4554 * the declared attribute type c.f. xmlValidateAttributeDecl()
4555 *
4556 * [ VC: Fixed Attribute Default ]
4557 * if an attribute has a default value declared with the #FIXED
4558 * keyword, instances of that attribute must match the default value.
4559 *
4560 * [ WFC: No < in Attribute Values ]
4561 * handled in xmlParseAttValue()
4562 *
4563 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4564 * or XML_ATTRIBUTE_FIXED.
4565 */
4566
4567int
4568xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4569 int val;
4570 xmlChar *ret;
4571
4572 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004573 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 SKIP(9);
4575 return(XML_ATTRIBUTE_REQUIRED);
4576 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004577 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004578 SKIP(8);
4579 return(XML_ATTRIBUTE_IMPLIED);
4580 }
4581 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004582 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004583 SKIP(6);
4584 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004585 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004586 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4587 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004588 }
4589 SKIP_BLANKS;
4590 }
4591 ret = xmlParseAttValue(ctxt);
4592 ctxt->instate = XML_PARSER_DTD;
4593 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004594 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004595 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004596 } else
4597 *value = ret;
4598 return(val);
4599}
4600
4601/**
4602 * xmlParseNotationType:
4603 * @ctxt: an XML parser context
4604 *
4605 * parse an Notation attribute type.
4606 *
4607 * Note: the leading 'NOTATION' S part has already being parsed...
4608 *
4609 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4610 *
4611 * [ VC: Notation Attributes ]
4612 * Values of this type must match one of the notation names included
4613 * in the declaration; all notation names in the declaration must be declared.
4614 *
4615 * Returns: the notation attribute tree built while parsing
4616 */
4617
4618xmlEnumerationPtr
4619xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004620 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4622
4623 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004624 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004625 return(NULL);
4626 }
4627 SHRINK;
4628 do {
4629 NEXT;
4630 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004631 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004633 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4634 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004635 return(ret);
4636 }
4637 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004638 if (cur == NULL) return(ret);
4639 if (last == NULL) ret = last = cur;
4640 else {
4641 last->next = cur;
4642 last = cur;
4643 }
4644 SKIP_BLANKS;
4645 } while (RAW == '|');
4646 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004647 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 if ((last != NULL) && (last != ret))
4649 xmlFreeEnumeration(last);
4650 return(ret);
4651 }
4652 NEXT;
4653 return(ret);
4654}
4655
4656/**
4657 * xmlParseEnumerationType:
4658 * @ctxt: an XML parser context
4659 *
4660 * parse an Enumeration attribute type.
4661 *
4662 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4663 *
4664 * [ VC: Enumeration ]
4665 * Values of this type must match one of the Nmtoken tokens in
4666 * the declaration
4667 *
4668 * Returns: the enumeration attribute tree built while parsing
4669 */
4670
4671xmlEnumerationPtr
4672xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4673 xmlChar *name;
4674 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4675
4676 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 return(NULL);
4679 }
4680 SHRINK;
4681 do {
4682 NEXT;
4683 SKIP_BLANKS;
4684 name = xmlParseNmtoken(ctxt);
4685 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004686 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004687 return(ret);
4688 }
4689 cur = xmlCreateEnumeration(name);
4690 xmlFree(name);
4691 if (cur == NULL) return(ret);
4692 if (last == NULL) ret = last = cur;
4693 else {
4694 last->next = cur;
4695 last = cur;
4696 }
4697 SKIP_BLANKS;
4698 } while (RAW == '|');
4699 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 return(ret);
4702 }
4703 NEXT;
4704 return(ret);
4705}
4706
4707/**
4708 * xmlParseEnumeratedType:
4709 * @ctxt: an XML parser context
4710 * @tree: the enumeration tree built while parsing
4711 *
4712 * parse an Enumerated attribute type.
4713 *
4714 * [57] EnumeratedType ::= NotationType | Enumeration
4715 *
4716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4717 *
4718 *
4719 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4720 */
4721
4722int
4723xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004724 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004725 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4728 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004729 return(0);
4730 }
4731 SKIP_BLANKS;
4732 *tree = xmlParseNotationType(ctxt);
4733 if (*tree == NULL) return(0);
4734 return(XML_ATTRIBUTE_NOTATION);
4735 }
4736 *tree = xmlParseEnumerationType(ctxt);
4737 if (*tree == NULL) return(0);
4738 return(XML_ATTRIBUTE_ENUMERATION);
4739}
4740
4741/**
4742 * xmlParseAttributeType:
4743 * @ctxt: an XML parser context
4744 * @tree: the enumeration tree built while parsing
4745 *
4746 * parse the Attribute list def for an element
4747 *
4748 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4749 *
4750 * [55] StringType ::= 'CDATA'
4751 *
4752 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4753 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4754 *
4755 * Validity constraints for attribute values syntax are checked in
4756 * xmlValidateAttributeValue()
4757 *
4758 * [ VC: ID ]
4759 * Values of type ID must match the Name production. A name must not
4760 * appear more than once in an XML document as a value of this type;
4761 * i.e., ID values must uniquely identify the elements which bear them.
4762 *
4763 * [ VC: One ID per Element Type ]
4764 * No element type may have more than one ID attribute specified.
4765 *
4766 * [ VC: ID Attribute Default ]
4767 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4768 *
4769 * [ VC: IDREF ]
4770 * Values of type IDREF must match the Name production, and values
4771 * of type IDREFS must match Names; each IDREF Name must match the value
4772 * of an ID attribute on some element in the XML document; i.e. IDREF
4773 * values must match the value of some ID attribute.
4774 *
4775 * [ VC: Entity Name ]
4776 * Values of type ENTITY must match the Name production, values
4777 * of type ENTITIES must match Names; each Entity Name must match the
4778 * name of an unparsed entity declared in the DTD.
4779 *
4780 * [ VC: Name Token ]
4781 * Values of type NMTOKEN must match the Nmtoken production; values
4782 * of type NMTOKENS must match Nmtokens.
4783 *
4784 * Returns the attribute type
4785 */
4786int
4787xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4788 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004789 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004790 SKIP(5);
4791 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004792 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004793 SKIP(6);
4794 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004795 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004796 SKIP(5);
4797 return(XML_ATTRIBUTE_IDREF);
4798 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4799 SKIP(2);
4800 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004801 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004802 SKIP(6);
4803 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004804 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004805 SKIP(8);
4806 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004807 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004808 SKIP(8);
4809 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004810 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004811 SKIP(7);
4812 return(XML_ATTRIBUTE_NMTOKEN);
4813 }
4814 return(xmlParseEnumeratedType(ctxt, tree));
4815}
4816
4817/**
4818 * xmlParseAttributeListDecl:
4819 * @ctxt: an XML parser context
4820 *
4821 * : parse the Attribute list def for an element
4822 *
4823 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4824 *
4825 * [53] AttDef ::= S Name S AttType S DefaultDecl
4826 *
4827 */
4828void
4829xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004830 const xmlChar *elemName;
4831 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004832 xmlEnumerationPtr tree;
4833
Daniel Veillarda07050d2003-10-19 14:46:32 +00004834 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004835 xmlParserInputPtr input = ctxt->input;
4836
4837 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004838 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004839 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004840 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004841 }
4842 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004843 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004844 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004845 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4846 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004847 return;
4848 }
4849 SKIP_BLANKS;
4850 GROW;
4851 while (RAW != '>') {
4852 const xmlChar *check = CUR_PTR;
4853 int type;
4854 int def;
4855 xmlChar *defaultValue = NULL;
4856
4857 GROW;
4858 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004859 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004860 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004861 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4862 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004863 break;
4864 }
4865 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004866 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004867 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004868 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004869 if (defaultValue != NULL)
4870 xmlFree(defaultValue);
4871 break;
4872 }
4873 SKIP_BLANKS;
4874
4875 type = xmlParseAttributeType(ctxt, &tree);
4876 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004877 if (defaultValue != NULL)
4878 xmlFree(defaultValue);
4879 break;
4880 }
4881
4882 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004883 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004884 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4885 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004886 if (defaultValue != NULL)
4887 xmlFree(defaultValue);
4888 if (tree != NULL)
4889 xmlFreeEnumeration(tree);
4890 break;
4891 }
4892 SKIP_BLANKS;
4893
4894 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4895 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004896 if (defaultValue != NULL)
4897 xmlFree(defaultValue);
4898 if (tree != NULL)
4899 xmlFreeEnumeration(tree);
4900 break;
4901 }
4902
4903 GROW;
4904 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004905 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004906 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004907 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004908 if (defaultValue != NULL)
4909 xmlFree(defaultValue);
4910 if (tree != NULL)
4911 xmlFreeEnumeration(tree);
4912 break;
4913 }
4914 SKIP_BLANKS;
4915 }
4916 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004917 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4918 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004919 if (defaultValue != NULL)
4920 xmlFree(defaultValue);
4921 if (tree != NULL)
4922 xmlFreeEnumeration(tree);
4923 break;
4924 }
4925 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4926 (ctxt->sax->attributeDecl != NULL))
4927 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4928 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004929 else if (tree != NULL)
4930 xmlFreeEnumeration(tree);
4931
4932 if ((ctxt->sax2) && (defaultValue != NULL) &&
4933 (def != XML_ATTRIBUTE_IMPLIED) &&
4934 (def != XML_ATTRIBUTE_REQUIRED)) {
4935 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4936 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004937 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4938 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4939 }
Owen Taylor3473f882001-02-23 17:55:21 +00004940 if (defaultValue != NULL)
4941 xmlFree(defaultValue);
4942 GROW;
4943 }
4944 if (RAW == '>') {
4945 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4947 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 NEXT;
4950 }
Owen Taylor3473f882001-02-23 17:55:21 +00004951 }
4952}
4953
4954/**
4955 * xmlParseElementMixedContentDecl:
4956 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004957 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004958 *
4959 * parse the declaration for a Mixed Element content
4960 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4961 *
4962 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4963 * '(' S? '#PCDATA' S? ')'
4964 *
4965 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4966 *
4967 * [ VC: No Duplicate Types ]
4968 * The same name must not appear more than once in a single
4969 * mixed-content declaration.
4970 *
4971 * returns: the list of the xmlElementContentPtr describing the element choices
4972 */
4973xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004974xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004975 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004976 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004977
4978 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004979 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004980 SKIP(7);
4981 SKIP_BLANKS;
4982 SHRINK;
4983 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004984 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004985 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4986"Element content declaration doesn't start and stop in the same entity\n",
4987 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004988 }
Owen Taylor3473f882001-02-23 17:55:21 +00004989 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004990 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 if (RAW == '*') {
4992 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4993 NEXT;
4994 }
4995 return(ret);
4996 }
4997 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004998 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if (ret == NULL) return(NULL);
5000 }
5001 while (RAW == '|') {
5002 NEXT;
5003 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005004 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005005 if (ret == NULL) return(NULL);
5006 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005007 if (cur != NULL)
5008 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005009 cur = ret;
5010 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005011 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005012 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005013 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005014 if (n->c1 != NULL)
5015 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005017 if (n != NULL)
5018 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005019 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005020 }
5021 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005022 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005023 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005024 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005025 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005026 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 return(NULL);
5028 }
5029 SKIP_BLANKS;
5030 GROW;
5031 }
5032 if ((RAW == ')') && (NXT(1) == '*')) {
5033 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005034 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005035 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005036 if (cur->c2 != NULL)
5037 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005038 }
5039 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005040 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005041 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5042"Element content declaration doesn't start and stop in the same entity\n",
5043 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005044 }
Owen Taylor3473f882001-02-23 17:55:21 +00005045 SKIP(2);
5046 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005047 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 return(NULL);
5050 }
5051
5052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005053 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
5055 return(ret);
5056}
5057
5058/**
5059 * xmlParseElementChildrenContentDecl:
5060 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005061 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005062 *
5063 * parse the declaration for a Mixed Element content
5064 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5065 *
5066 *
5067 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5068 *
5069 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5070 *
5071 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5072 *
5073 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5074 *
5075 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5076 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005077 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005078 * opening or closing parentheses in a choice, seq, or Mixed
5079 * construct is contained in the replacement text for a parameter
5080 * entity, both must be contained in the same replacement text. For
5081 * interoperability, if a parameter-entity reference appears in a
5082 * choice, seq, or Mixed construct, its replacement text should not
5083 * be empty, and neither the first nor last non-blank character of
5084 * the replacement text should be a connector (| or ,).
5085 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005086 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005087 * hierarchy.
5088 */
5089xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005090xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005091 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005092 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005093 xmlChar type = 0;
5094
5095 SKIP_BLANKS;
5096 GROW;
5097 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005098 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005099
Owen Taylor3473f882001-02-23 17:55:21 +00005100 /* Recurse on first child */
5101 NEXT;
5102 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005103 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 SKIP_BLANKS;
5105 GROW;
5106 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005107 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005108 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005109 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005110 return(NULL);
5111 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005112 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005113 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005114 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005115 return(NULL);
5116 }
Owen Taylor3473f882001-02-23 17:55:21 +00005117 GROW;
5118 if (RAW == '?') {
5119 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5120 NEXT;
5121 } else if (RAW == '*') {
5122 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5123 NEXT;
5124 } else if (RAW == '+') {
5125 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5126 NEXT;
5127 } else {
5128 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5129 }
Owen Taylor3473f882001-02-23 17:55:21 +00005130 GROW;
5131 }
5132 SKIP_BLANKS;
5133 SHRINK;
5134 while (RAW != ')') {
5135 /*
5136 * Each loop we parse one separator and one element.
5137 */
5138 if (RAW == ',') {
5139 if (type == 0) type = CUR;
5140
5141 /*
5142 * Detect "Name | Name , Name" error
5143 */
5144 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005146 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005148 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005149 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005151 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005152 return(NULL);
5153 }
5154 NEXT;
5155
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005156 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005158 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005159 xmlFreeDocElementContent(ctxt->myDoc, last);
5160 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005161 return(NULL);
5162 }
5163 if (last == NULL) {
5164 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005165 if (ret != NULL)
5166 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005167 ret = cur = op;
5168 } else {
5169 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005170 if (op != NULL)
5171 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005172 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005173 if (last != NULL)
5174 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005175 cur =op;
5176 last = NULL;
5177 }
5178 } else if (RAW == '|') {
5179 if (type == 0) type = CUR;
5180
5181 /*
5182 * Detect "Name , Name | Name" error
5183 */
5184 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005185 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005186 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005187 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005188 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005189 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005191 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005192 return(NULL);
5193 }
5194 NEXT;
5195
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005196 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005198 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005199 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005201 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005202 return(NULL);
5203 }
5204 if (last == NULL) {
5205 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005206 if (ret != NULL)
5207 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005208 ret = cur = op;
5209 } else {
5210 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005211 if (op != NULL)
5212 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005213 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005214 if (last != NULL)
5215 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005216 cur =op;
5217 last = NULL;
5218 }
5219 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005220 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005222 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005223 return(NULL);
5224 }
5225 GROW;
5226 SKIP_BLANKS;
5227 GROW;
5228 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005229 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005230 /* Recurse on second child */
5231 NEXT;
5232 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005233 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 SKIP_BLANKS;
5235 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005236 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005238 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005240 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 return(NULL);
5242 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005243 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 if (RAW == '?') {
5245 last->ocur = XML_ELEMENT_CONTENT_OPT;
5246 NEXT;
5247 } else if (RAW == '*') {
5248 last->ocur = XML_ELEMENT_CONTENT_MULT;
5249 NEXT;
5250 } else if (RAW == '+') {
5251 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5252 NEXT;
5253 } else {
5254 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5255 }
5256 }
5257 SKIP_BLANKS;
5258 GROW;
5259 }
5260 if ((cur != NULL) && (last != NULL)) {
5261 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005262 if (last != NULL)
5263 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005264 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005265 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005266 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5267"Element content declaration doesn't start and stop in the same entity\n",
5268 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005269 }
Owen Taylor3473f882001-02-23 17:55:21 +00005270 NEXT;
5271 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005272 if (ret != NULL) {
5273 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5274 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5275 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5276 else
5277 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5278 }
Owen Taylor3473f882001-02-23 17:55:21 +00005279 NEXT;
5280 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005281 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005282 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005283 cur = ret;
5284 /*
5285 * Some normalization:
5286 * (a | b* | c?)* == (a | b | c)*
5287 */
5288 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5289 if ((cur->c1 != NULL) &&
5290 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5291 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5292 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5293 if ((cur->c2 != NULL) &&
5294 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5295 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5296 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5297 cur = cur->c2;
5298 }
5299 }
Owen Taylor3473f882001-02-23 17:55:21 +00005300 NEXT;
5301 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005302 if (ret != NULL) {
5303 int found = 0;
5304
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005305 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5306 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5307 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005308 else
5309 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005310 /*
5311 * Some normalization:
5312 * (a | b*)+ == (a | b)*
5313 * (a | b?)+ == (a | b)*
5314 */
5315 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5316 if ((cur->c1 != NULL) &&
5317 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5318 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5319 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5320 found = 1;
5321 }
5322 if ((cur->c2 != NULL) &&
5323 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5324 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5325 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5326 found = 1;
5327 }
5328 cur = cur->c2;
5329 }
5330 if (found)
5331 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5332 }
Owen Taylor3473f882001-02-23 17:55:21 +00005333 NEXT;
5334 }
5335 return(ret);
5336}
5337
5338/**
5339 * xmlParseElementContentDecl:
5340 * @ctxt: an XML parser context
5341 * @name: the name of the element being defined.
5342 * @result: the Element Content pointer will be stored here if any
5343 *
5344 * parse the declaration for an Element content either Mixed or Children,
5345 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5346 *
5347 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5348 *
5349 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5350 */
5351
5352int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005353xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005354 xmlElementContentPtr *result) {
5355
5356 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005358 int res;
5359
5360 *result = NULL;
5361
5362 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005363 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005364 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005365 return(-1);
5366 }
5367 NEXT;
5368 GROW;
5369 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005370 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005371 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005372 res = XML_ELEMENT_TYPE_MIXED;
5373 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005374 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 res = XML_ELEMENT_TYPE_ELEMENT;
5376 }
Owen Taylor3473f882001-02-23 17:55:21 +00005377 SKIP_BLANKS;
5378 *result = tree;
5379 return(res);
5380}
5381
5382/**
5383 * xmlParseElementDecl:
5384 * @ctxt: an XML parser context
5385 *
5386 * parse an Element declaration.
5387 *
5388 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5389 *
5390 * [ VC: Unique Element Type Declaration ]
5391 * No element type may be declared more than once
5392 *
5393 * Returns the type of the element, or -1 in case of error
5394 */
5395int
5396xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005397 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005398 int ret = -1;
5399 xmlElementContentPtr content = NULL;
5400
Daniel Veillard4c778d82005-01-23 17:37:44 +00005401 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005402 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005403 xmlParserInputPtr input = ctxt->input;
5404
5405 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005406 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005407 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5408 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005409 }
5410 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005411 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005413 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5414 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005415 return(-1);
5416 }
5417 while ((RAW == 0) && (ctxt->inputNr > 1))
5418 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005419 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005420 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5421 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005422 }
5423 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005424 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005425 SKIP(5);
5426 /*
5427 * Element must always be empty.
5428 */
5429 ret = XML_ELEMENT_TYPE_EMPTY;
5430 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5431 (NXT(2) == 'Y')) {
5432 SKIP(3);
5433 /*
5434 * Element is a generic container.
5435 */
5436 ret = XML_ELEMENT_TYPE_ANY;
5437 } else if (RAW == '(') {
5438 ret = xmlParseElementContentDecl(ctxt, name, &content);
5439 } else {
5440 /*
5441 * [ WFC: PEs in Internal Subset ] error handling.
5442 */
5443 if ((RAW == '%') && (ctxt->external == 0) &&
5444 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005445 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005446 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005447 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005449 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451 return(-1);
5452 }
5453
5454 SKIP_BLANKS;
5455 /*
5456 * Pop-up of finished entities.
5457 */
5458 while ((RAW == 0) && (ctxt->inputNr > 1))
5459 xmlPopInput(ctxt);
5460 SKIP_BLANKS;
5461
5462 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005463 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005464 if (content != NULL) {
5465 xmlFreeDocElementContent(ctxt->myDoc, content);
5466 }
Owen Taylor3473f882001-02-23 17:55:21 +00005467 } else {
5468 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005469 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5470 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005471 }
5472
5473 NEXT;
5474 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005475 (ctxt->sax->elementDecl != NULL)) {
5476 if (content != NULL)
5477 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005478 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5479 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005480 if ((content != NULL) && (content->parent == NULL)) {
5481 /*
5482 * this is a trick: if xmlAddElementDecl is called,
5483 * instead of copying the full tree it is plugged directly
5484 * if called from the parser. Avoid duplicating the
5485 * interfaces or change the API/ABI
5486 */
5487 xmlFreeDocElementContent(ctxt->myDoc, content);
5488 }
5489 } else if (content != NULL) {
5490 xmlFreeDocElementContent(ctxt->myDoc, content);
5491 }
Owen Taylor3473f882001-02-23 17:55:21 +00005492 }
Owen Taylor3473f882001-02-23 17:55:21 +00005493 }
5494 return(ret);
5495}
5496
5497/**
Owen Taylor3473f882001-02-23 17:55:21 +00005498 * xmlParseConditionalSections
5499 * @ctxt: an XML parser context
5500 *
5501 * [61] conditionalSect ::= includeSect | ignoreSect
5502 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5503 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5504 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5505 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5506 */
5507
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005508static void
Owen Taylor3473f882001-02-23 17:55:21 +00005509xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5510 SKIP(3);
5511 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005512 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005513 SKIP(7);
5514 SKIP_BLANKS;
5515 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005516 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005517 } else {
5518 NEXT;
5519 }
5520 if (xmlParserDebugEntities) {
5521 if ((ctxt->input != NULL) && (ctxt->input->filename))
5522 xmlGenericError(xmlGenericErrorContext,
5523 "%s(%d): ", ctxt->input->filename,
5524 ctxt->input->line);
5525 xmlGenericError(xmlGenericErrorContext,
5526 "Entering INCLUDE Conditional Section\n");
5527 }
5528
5529 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5530 (NXT(2) != '>'))) {
5531 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005532 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005533
5534 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5535 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005536 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005537 NEXT;
5538 } else if (RAW == '%') {
5539 xmlParsePEReference(ctxt);
5540 } else
5541 xmlParseMarkupDecl(ctxt);
5542
5543 /*
5544 * Pop-up of finished entities.
5545 */
5546 while ((RAW == 0) && (ctxt->inputNr > 1))
5547 xmlPopInput(ctxt);
5548
Daniel Veillardfdc91562002-07-01 21:52:03 +00005549 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005550 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 break;
5552 }
5553 }
5554 if (xmlParserDebugEntities) {
5555 if ((ctxt->input != NULL) && (ctxt->input->filename))
5556 xmlGenericError(xmlGenericErrorContext,
5557 "%s(%d): ", ctxt->input->filename,
5558 ctxt->input->line);
5559 xmlGenericError(xmlGenericErrorContext,
5560 "Leaving INCLUDE Conditional Section\n");
5561 }
5562
Daniel Veillarda07050d2003-10-19 14:46:32 +00005563 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005564 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005565 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005566 int depth = 0;
5567
5568 SKIP(6);
5569 SKIP_BLANKS;
5570 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005571 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005572 } else {
5573 NEXT;
5574 }
5575 if (xmlParserDebugEntities) {
5576 if ((ctxt->input != NULL) && (ctxt->input->filename))
5577 xmlGenericError(xmlGenericErrorContext,
5578 "%s(%d): ", ctxt->input->filename,
5579 ctxt->input->line);
5580 xmlGenericError(xmlGenericErrorContext,
5581 "Entering IGNORE Conditional Section\n");
5582 }
5583
5584 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005585 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005586 * But disable SAX event generating DTD building in the meantime
5587 */
5588 state = ctxt->disableSAX;
5589 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005590 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005591 ctxt->instate = XML_PARSER_IGNORE;
5592
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005593 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005594 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5595 depth++;
5596 SKIP(3);
5597 continue;
5598 }
5599 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5600 if (--depth >= 0) SKIP(3);
5601 continue;
5602 }
5603 NEXT;
5604 continue;
5605 }
5606
5607 ctxt->disableSAX = state;
5608 ctxt->instate = instate;
5609
5610 if (xmlParserDebugEntities) {
5611 if ((ctxt->input != NULL) && (ctxt->input->filename))
5612 xmlGenericError(xmlGenericErrorContext,
5613 "%s(%d): ", ctxt->input->filename,
5614 ctxt->input->line);
5615 xmlGenericError(xmlGenericErrorContext,
5616 "Leaving IGNORE Conditional Section\n");
5617 }
5618
5619 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005620 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005621 }
5622
5623 if (RAW == 0)
5624 SHRINK;
5625
5626 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005628 } else {
5629 SKIP(3);
5630 }
5631}
5632
5633/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005634 * xmlParseMarkupDecl:
5635 * @ctxt: an XML parser context
5636 *
5637 * parse Markup declarations
5638 *
5639 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5640 * NotationDecl | PI | Comment
5641 *
5642 * [ VC: Proper Declaration/PE Nesting ]
5643 * Parameter-entity replacement text must be properly nested with
5644 * markup declarations. That is to say, if either the first character
5645 * or the last character of a markup declaration (markupdecl above) is
5646 * contained in the replacement text for a parameter-entity reference,
5647 * both must be contained in the same replacement text.
5648 *
5649 * [ WFC: PEs in Internal Subset ]
5650 * In the internal DTD subset, parameter-entity references can occur
5651 * only where markup declarations can occur, not within markup declarations.
5652 * (This does not apply to references that occur in external parameter
5653 * entities or to the external subset.)
5654 */
5655void
5656xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5657 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005658 if (CUR == '<') {
5659 if (NXT(1) == '!') {
5660 switch (NXT(2)) {
5661 case 'E':
5662 if (NXT(3) == 'L')
5663 xmlParseElementDecl(ctxt);
5664 else if (NXT(3) == 'N')
5665 xmlParseEntityDecl(ctxt);
5666 break;
5667 case 'A':
5668 xmlParseAttributeListDecl(ctxt);
5669 break;
5670 case 'N':
5671 xmlParseNotationDecl(ctxt);
5672 break;
5673 case '-':
5674 xmlParseComment(ctxt);
5675 break;
5676 default:
5677 /* there is an error but it will be detected later */
5678 break;
5679 }
5680 } else if (NXT(1) == '?') {
5681 xmlParsePI(ctxt);
5682 }
5683 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005684 /*
5685 * This is only for internal subset. On external entities,
5686 * the replacement is done before parsing stage
5687 */
5688 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5689 xmlParsePEReference(ctxt);
5690
5691 /*
5692 * Conditional sections are allowed from entities included
5693 * by PE References in the internal subset.
5694 */
5695 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5696 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5697 xmlParseConditionalSections(ctxt);
5698 }
5699 }
5700
5701 ctxt->instate = XML_PARSER_DTD;
5702}
5703
5704/**
5705 * xmlParseTextDecl:
5706 * @ctxt: an XML parser context
5707 *
5708 * parse an XML declaration header for external entities
5709 *
5710 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5711 *
5712 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5713 */
5714
5715void
5716xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5717 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005718 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005719
5720 /*
5721 * We know that '<?xml' is here.
5722 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005723 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005724 SKIP(5);
5725 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005726 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005727 return;
5728 }
5729
William M. Brack76e95df2003-10-18 16:20:14 +00005730 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005731 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5732 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005733 }
5734 SKIP_BLANKS;
5735
5736 /*
5737 * We may have the VersionInfo here.
5738 */
5739 version = xmlParseVersionInfo(ctxt);
5740 if (version == NULL)
5741 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005742 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005743 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005744 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5745 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005746 }
5747 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005748 ctxt->input->version = version;
5749
5750 /*
5751 * We must have the encoding declaration
5752 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005753 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005754 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5755 /*
5756 * The XML REC instructs us to stop parsing right here
5757 */
5758 return;
5759 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005760 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5761 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5762 "Missing encoding in text declaration\n");
5763 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005764
5765 SKIP_BLANKS;
5766 if ((RAW == '?') && (NXT(1) == '>')) {
5767 SKIP(2);
5768 } else if (RAW == '>') {
5769 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005770 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005771 NEXT;
5772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005774 MOVETO_ENDTAG(CUR_PTR);
5775 NEXT;
5776 }
5777}
5778
5779/**
Owen Taylor3473f882001-02-23 17:55:21 +00005780 * xmlParseExternalSubset:
5781 * @ctxt: an XML parser context
5782 * @ExternalID: the external identifier
5783 * @SystemID: the system identifier (or URL)
5784 *
5785 * parse Markup declarations from an external subset
5786 *
5787 * [30] extSubset ::= textDecl? extSubsetDecl
5788 *
5789 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5790 */
5791void
5792xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5793 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005794 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005795 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005796 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005797 xmlParseTextDecl(ctxt);
5798 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5799 /*
5800 * The XML REC instructs us to stop parsing right here
5801 */
5802 ctxt->instate = XML_PARSER_EOF;
5803 return;
5804 }
5805 }
5806 if (ctxt->myDoc == NULL) {
5807 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5808 }
5809 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5810 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5811
5812 ctxt->instate = XML_PARSER_DTD;
5813 ctxt->external = 1;
5814 while (((RAW == '<') && (NXT(1) == '?')) ||
5815 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005816 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005817 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005818 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005819
5820 GROW;
5821 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5822 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005823 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005824 NEXT;
5825 } else if (RAW == '%') {
5826 xmlParsePEReference(ctxt);
5827 } else
5828 xmlParseMarkupDecl(ctxt);
5829
5830 /*
5831 * Pop-up of finished entities.
5832 */
5833 while ((RAW == 0) && (ctxt->inputNr > 1))
5834 xmlPopInput(ctxt);
5835
Daniel Veillardfdc91562002-07-01 21:52:03 +00005836 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005837 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 break;
5839 }
5840 }
5841
5842 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005843 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005844 }
5845
5846}
5847
5848/**
5849 * xmlParseReference:
5850 * @ctxt: an XML parser context
5851 *
5852 * parse and handle entity references in content, depending on the SAX
5853 * interface, this may end-up in a call to character() if this is a
5854 * CharRef, a predefined entity, if there is no reference() callback.
5855 * or if the parser was asked to switch to that mode.
5856 *
5857 * [67] Reference ::= EntityRef | CharRef
5858 */
5859void
5860xmlParseReference(xmlParserCtxtPtr ctxt) {
5861 xmlEntityPtr ent;
5862 xmlChar *val;
5863 if (RAW != '&') return;
5864
5865 if (NXT(1) == '#') {
5866 int i = 0;
5867 xmlChar out[10];
5868 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005869 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005870
5871 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5872 /*
5873 * So we are using non-UTF-8 buffers
5874 * Check that the char fit on 8bits, if not
5875 * generate a CharRef.
5876 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005877 if (value <= 0xFF) {
5878 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005879 out[1] = 0;
5880 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5881 (!ctxt->disableSAX))
5882 ctxt->sax->characters(ctxt->userData, out, 1);
5883 } else {
5884 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005885 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005886 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005887 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005888 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5889 (!ctxt->disableSAX))
5890 ctxt->sax->reference(ctxt->userData, out);
5891 }
5892 } else {
5893 /*
5894 * Just encode the value in UTF-8
5895 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005896 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 out[i] = 0;
5898 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5899 (!ctxt->disableSAX))
5900 ctxt->sax->characters(ctxt->userData, out, i);
5901 }
5902 } else {
5903 ent = xmlParseEntityRef(ctxt);
5904 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005905 if (!ctxt->wellFormed)
5906 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 if ((ent->name != NULL) &&
5908 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5909 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005910 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005911
5912
5913 /*
5914 * The first reference to the entity trigger a parsing phase
5915 * where the ent->children is filled with the result from
5916 * the parsing.
5917 */
5918 if (ent->children == NULL) {
5919 xmlChar *value;
5920 value = ent->content;
5921
5922 /*
5923 * Check that this entity is well formed
5924 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005925 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005926 (value[1] == 0) && (value[0] == '<') &&
5927 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5928 /*
5929 * DONE: get definite answer on this !!!
5930 * Lots of entity decls are used to declare a single
5931 * char
5932 * <!ENTITY lt "<">
5933 * Which seems to be valid since
5934 * 2.4: The ampersand character (&) and the left angle
5935 * bracket (<) may appear in their literal form only
5936 * when used ... They are also legal within the literal
5937 * entity value of an internal entity declaration;i
5938 * see "4.3.2 Well-Formed Parsed Entities".
5939 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5940 * Looking at the OASIS test suite and James Clark
5941 * tests, this is broken. However the XML REC uses
5942 * it. Is the XML REC not well-formed ????
5943 * This is a hack to avoid this problem
5944 *
5945 * ANSWER: since lt gt amp .. are already defined,
5946 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005947 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005948 * is lousy but acceptable.
5949 */
5950 list = xmlNewDocText(ctxt->myDoc, value);
5951 if (list != NULL) {
5952 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5953 (ent->children == NULL)) {
5954 ent->children = list;
5955 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005956 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005957 list->parent = (xmlNodePtr) ent;
5958 } else {
5959 xmlFreeNodeList(list);
5960 }
5961 } else if (list != NULL) {
5962 xmlFreeNodeList(list);
5963 }
5964 } else {
5965 /*
5966 * 4.3.2: An internal general parsed entity is well-formed
5967 * if its replacement text matches the production labeled
5968 * content.
5969 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005970
5971 void *user_data;
5972 /*
5973 * This is a bit hackish but this seems the best
5974 * way to make sure both SAX and DOM entity support
5975 * behaves okay.
5976 */
5977 if (ctxt->userData == ctxt)
5978 user_data = NULL;
5979 else
5980 user_data = ctxt->userData;
5981
Owen Taylor3473f882001-02-23 17:55:21 +00005982 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5983 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005984 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5985 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005986 ctxt->depth--;
5987 } else if (ent->etype ==
5988 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5989 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005990 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005991 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005992 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005993 ctxt->depth--;
5994 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005995 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005996 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5997 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
5999 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006000 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006001 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006002 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006003 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6004 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006005 (ent->children == NULL)) {
6006 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006007 if (ctxt->replaceEntities) {
6008 /*
6009 * Prune it directly in the generated document
6010 * except for single text nodes.
6011 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006012 if (((list->type == XML_TEXT_NODE) &&
6013 (list->next == NULL)) ||
6014 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006015 list->parent = (xmlNodePtr) ent;
6016 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006017 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006018 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006019 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006020 while (list != NULL) {
6021 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006022 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006023 if (list->next == NULL)
6024 ent->last = list;
6025 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006026 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006027 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006028#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006029 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6030 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006031#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006032 }
6033 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006034 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006035 while (list != NULL) {
6036 list->parent = (xmlNodePtr) ent;
6037 if (list->next == NULL)
6038 ent->last = list;
6039 list = list->next;
6040 }
Owen Taylor3473f882001-02-23 17:55:21 +00006041 }
6042 } else {
6043 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006044 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006045 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006046 } else if ((ret != XML_ERR_OK) &&
6047 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006048 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006049 } else if (list != NULL) {
6050 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006051 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006052 }
6053 }
6054 }
6055 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6056 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6057 /*
6058 * Create a node.
6059 */
6060 ctxt->sax->reference(ctxt->userData, ent->name);
6061 return;
6062 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006063 /*
6064 * There is a problem on the handling of _private for entities
6065 * (bug 155816): Should we copy the content of the field from
6066 * the entity (possibly overwriting some value set by the user
6067 * when a copy is created), should we leave it alone, or should
6068 * we try to take care of different situations? The problem
6069 * is exacerbated by the usage of this field by the xmlReader.
6070 * To fix this bug, we look at _private on the created node
6071 * and, if it's NULL, we copy in whatever was in the entity.
6072 * If it's not NULL we leave it alone. This is somewhat of a
6073 * hack - maybe we should have further tests to determine
6074 * what to do.
6075 */
Owen Taylor3473f882001-02-23 17:55:21 +00006076 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6077 /*
6078 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006079 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006080 * In the first occurrence list contains the replacement.
6081 * progressive == 2 means we are operating on the Reader
6082 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006083 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006084 if (((list == NULL) && (ent->owner == 0)) ||
6085 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006086 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006087
6088 /*
6089 * when operating on a reader, the entities definitions
6090 * are always owning the entities subtree.
6091 if (ctxt->parseMode == XML_PARSE_READER)
6092 ent->owner = 1;
6093 */
6094
Daniel Veillard62f313b2001-07-04 19:49:14 +00006095 cur = ent->children;
6096 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006097 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006098 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006099 if (nw->_private == NULL)
6100 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006101 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006102 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006103 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006104 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006105 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006106 if (cur == ent->last) {
6107 /*
6108 * needed to detect some strange empty
6109 * node cases in the reader tests
6110 */
6111 if ((ctxt->parseMode == XML_PARSE_READER) &&
6112 (nw->type == XML_ELEMENT_NODE) &&
6113 (nw->children == NULL))
6114 nw->extra = 1;
6115
Daniel Veillard62f313b2001-07-04 19:49:14 +00006116 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006117 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006118 cur = cur->next;
6119 }
Daniel Veillard81273902003-09-30 00:43:48 +00006120#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006121 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006122 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006124 } else if (list == NULL) {
6125 xmlNodePtr nw = NULL, cur, next, last,
6126 firstChild = NULL;
6127 /*
6128 * Copy the entity child list and make it the new
6129 * entity child list. The goal is to make sure any
6130 * ID or REF referenced will be the one from the
6131 * document content and not the entity copy.
6132 */
6133 cur = ent->children;
6134 ent->children = NULL;
6135 last = ent->last;
6136 ent->last = NULL;
6137 while (cur != NULL) {
6138 next = cur->next;
6139 cur->next = NULL;
6140 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006141 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006142 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006143 if (nw->_private == NULL)
6144 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006145 if (firstChild == NULL){
6146 firstChild = cur;
6147 }
6148 xmlAddChild((xmlNodePtr) ent, nw);
6149 xmlAddChild(ctxt->node, cur);
6150 }
6151 if (cur == last)
6152 break;
6153 cur = next;
6154 }
6155 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006156#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006157 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6158 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006159#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006160 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006161 const xmlChar *nbktext;
6162
Daniel Veillard62f313b2001-07-04 19:49:14 +00006163 /*
6164 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006165 * node with a possible previous text one which
6166 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006167 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006168 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6169 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006170 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006171 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006172 if ((ent->last != ent->children) &&
6173 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006174 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006175 xmlAddChildList(ctxt->node, ent->children);
6176 }
6177
Owen Taylor3473f882001-02-23 17:55:21 +00006178 /*
6179 * This is to avoid a nasty side effect, see
6180 * characters() in SAX.c
6181 */
6182 ctxt->nodemem = 0;
6183 ctxt->nodelen = 0;
6184 return;
6185 } else {
6186 /*
6187 * Probably running in SAX mode
6188 */
6189 xmlParserInputPtr input;
6190
6191 input = xmlNewEntityInputStream(ctxt, ent);
6192 xmlPushInput(ctxt, input);
6193 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006194 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6195 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006196 xmlParseTextDecl(ctxt);
6197 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6198 /*
6199 * The XML REC instructs us to stop parsing right here
6200 */
6201 ctxt->instate = XML_PARSER_EOF;
6202 return;
6203 }
6204 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006205 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6206 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006207 }
6208 }
6209 return;
6210 }
6211 }
6212 } else {
6213 val = ent->content;
6214 if (val == NULL) return;
6215 /*
6216 * inline the entity.
6217 */
6218 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6219 (!ctxt->disableSAX))
6220 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6221 }
6222 }
6223}
6224
6225/**
6226 * xmlParseEntityRef:
6227 * @ctxt: an XML parser context
6228 *
6229 * parse ENTITY references declarations
6230 *
6231 * [68] EntityRef ::= '&' Name ';'
6232 *
6233 * [ WFC: Entity Declared ]
6234 * In a document without any DTD, a document with only an internal DTD
6235 * subset which contains no parameter entity references, or a document
6236 * with "standalone='yes'", the Name given in the entity reference
6237 * must match that in an entity declaration, except that well-formed
6238 * documents need not declare any of the following entities: amp, lt,
6239 * gt, apos, quot. The declaration of a parameter entity must precede
6240 * any reference to it. Similarly, the declaration of a general entity
6241 * must precede any reference to it which appears in a default value in an
6242 * attribute-list declaration. Note that if entities are declared in the
6243 * external subset or in external parameter entities, a non-validating
6244 * processor is not obligated to read and process their declarations;
6245 * for such documents, the rule that an entity must be declared is a
6246 * well-formedness constraint only if standalone='yes'.
6247 *
6248 * [ WFC: Parsed Entity ]
6249 * An entity reference must not contain the name of an unparsed entity
6250 *
6251 * Returns the xmlEntityPtr if found, or NULL otherwise.
6252 */
6253xmlEntityPtr
6254xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006255 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006256 xmlEntityPtr ent = NULL;
6257
6258 GROW;
6259
6260 if (RAW == '&') {
6261 NEXT;
6262 name = xmlParseName(ctxt);
6263 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006264 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6265 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006266 } else {
6267 if (RAW == ';') {
6268 NEXT;
6269 /*
6270 * Ask first SAX for entity resolution, otherwise try the
6271 * predefined set.
6272 */
6273 if (ctxt->sax != NULL) {
6274 if (ctxt->sax->getEntity != NULL)
6275 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006276 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006277 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006278 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6279 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006280 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006281 }
Owen Taylor3473f882001-02-23 17:55:21 +00006282 }
6283 /*
6284 * [ WFC: Entity Declared ]
6285 * In a document without any DTD, a document with only an
6286 * internal DTD subset which contains no parameter entity
6287 * references, or a document with "standalone='yes'", the
6288 * Name given in the entity reference must match that in an
6289 * entity declaration, except that well-formed documents
6290 * need not declare any of the following entities: amp, lt,
6291 * gt, apos, quot.
6292 * The declaration of a parameter entity must precede any
6293 * reference to it.
6294 * Similarly, the declaration of a general entity must
6295 * precede any reference to it which appears in a default
6296 * value in an attribute-list declaration. Note that if
6297 * entities are declared in the external subset or in
6298 * external parameter entities, a non-validating processor
6299 * is not obligated to read and process their declarations;
6300 * for such documents, the rule that an entity must be
6301 * declared is a well-formedness constraint only if
6302 * standalone='yes'.
6303 */
6304 if (ent == NULL) {
6305 if ((ctxt->standalone == 1) ||
6306 ((ctxt->hasExternalSubset == 0) &&
6307 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006309 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006311 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006312 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006313 if ((ctxt->inSubset == 0) &&
6314 (ctxt->sax != NULL) &&
6315 (ctxt->sax->reference != NULL)) {
6316 ctxt->sax->reference(ctxt, name);
6317 }
Owen Taylor3473f882001-02-23 17:55:21 +00006318 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006319 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006320 }
6321
6322 /*
6323 * [ WFC: Parsed Entity ]
6324 * An entity reference must not contain the name of an
6325 * unparsed entity
6326 */
6327 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006328 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006329 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 }
6331
6332 /*
6333 * [ WFC: No External Entity References ]
6334 * Attribute values cannot contain direct or indirect
6335 * entity references to external entities.
6336 */
6337 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6338 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006339 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6340 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006341 }
6342 /*
6343 * [ WFC: No < in Attribute Values ]
6344 * The replacement text of any entity referred to directly or
6345 * indirectly in an attribute value (other than "&lt;") must
6346 * not contain a <.
6347 */
6348 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6349 (ent != NULL) &&
6350 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6351 (ent->content != NULL) &&
6352 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006353 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006354 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006355 }
6356
6357 /*
6358 * Internal check, no parameter entities here ...
6359 */
6360 else {
6361 switch (ent->etype) {
6362 case XML_INTERNAL_PARAMETER_ENTITY:
6363 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006364 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6365 "Attempt to reference the parameter entity '%s'\n",
6366 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006367 break;
6368 default:
6369 break;
6370 }
6371 }
6372
6373 /*
6374 * [ WFC: No Recursion ]
6375 * A parsed entity must not contain a recursive reference
6376 * to itself, either directly or indirectly.
6377 * Done somewhere else
6378 */
6379
6380 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006381 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 }
Owen Taylor3473f882001-02-23 17:55:21 +00006383 }
6384 }
6385 return(ent);
6386}
6387
6388/**
6389 * xmlParseStringEntityRef:
6390 * @ctxt: an XML parser context
6391 * @str: a pointer to an index in the string
6392 *
6393 * parse ENTITY references declarations, but this version parses it from
6394 * a string value.
6395 *
6396 * [68] EntityRef ::= '&' Name ';'
6397 *
6398 * [ WFC: Entity Declared ]
6399 * In a document without any DTD, a document with only an internal DTD
6400 * subset which contains no parameter entity references, or a document
6401 * with "standalone='yes'", the Name given in the entity reference
6402 * must match that in an entity declaration, except that well-formed
6403 * documents need not declare any of the following entities: amp, lt,
6404 * gt, apos, quot. The declaration of a parameter entity must precede
6405 * any reference to it. Similarly, the declaration of a general entity
6406 * must precede any reference to it which appears in a default value in an
6407 * attribute-list declaration. Note that if entities are declared in the
6408 * external subset or in external parameter entities, a non-validating
6409 * processor is not obligated to read and process their declarations;
6410 * for such documents, the rule that an entity must be declared is a
6411 * well-formedness constraint only if standalone='yes'.
6412 *
6413 * [ WFC: Parsed Entity ]
6414 * An entity reference must not contain the name of an unparsed entity
6415 *
6416 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6417 * is updated to the current location in the string.
6418 */
6419xmlEntityPtr
6420xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6421 xmlChar *name;
6422 const xmlChar *ptr;
6423 xmlChar cur;
6424 xmlEntityPtr ent = NULL;
6425
6426 if ((str == NULL) || (*str == NULL))
6427 return(NULL);
6428 ptr = *str;
6429 cur = *ptr;
6430 if (cur == '&') {
6431 ptr++;
6432 cur = *ptr;
6433 name = xmlParseStringName(ctxt, &ptr);
6434 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006435 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6436 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006437 } else {
6438 if (*ptr == ';') {
6439 ptr++;
6440 /*
6441 * Ask first SAX for entity resolution, otherwise try the
6442 * predefined set.
6443 */
6444 if (ctxt->sax != NULL) {
6445 if (ctxt->sax->getEntity != NULL)
6446 ent = ctxt->sax->getEntity(ctxt->userData, name);
6447 if (ent == NULL)
6448 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006449 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006450 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006451 }
Owen Taylor3473f882001-02-23 17:55:21 +00006452 }
6453 /*
6454 * [ WFC: Entity Declared ]
6455 * In a document without any DTD, a document with only an
6456 * internal DTD subset which contains no parameter entity
6457 * references, or a document with "standalone='yes'", the
6458 * Name given in the entity reference must match that in an
6459 * entity declaration, except that well-formed documents
6460 * need not declare any of the following entities: amp, lt,
6461 * gt, apos, quot.
6462 * The declaration of a parameter entity must precede any
6463 * reference to it.
6464 * Similarly, the declaration of a general entity must
6465 * precede any reference to it which appears in a default
6466 * value in an attribute-list declaration. Note that if
6467 * entities are declared in the external subset or in
6468 * external parameter entities, a non-validating processor
6469 * is not obligated to read and process their declarations;
6470 * for such documents, the rule that an entity must be
6471 * declared is a well-formedness constraint only if
6472 * standalone='yes'.
6473 */
6474 if (ent == NULL) {
6475 if ((ctxt->standalone == 1) ||
6476 ((ctxt->hasExternalSubset == 0) &&
6477 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006478 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006479 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006481 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006482 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006483 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006484 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006485 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006486 }
6487
6488 /*
6489 * [ WFC: Parsed Entity ]
6490 * An entity reference must not contain the name of an
6491 * unparsed entity
6492 */
6493 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006494 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006495 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006496 }
6497
6498 /*
6499 * [ WFC: No External Entity References ]
6500 * Attribute values cannot contain direct or indirect
6501 * entity references to external entities.
6502 */
6503 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6504 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006505 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006506 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006507 }
6508 /*
6509 * [ WFC: No < in Attribute Values ]
6510 * The replacement text of any entity referred to directly or
6511 * indirectly in an attribute value (other than "&lt;") must
6512 * not contain a <.
6513 */
6514 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6515 (ent != NULL) &&
6516 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6517 (ent->content != NULL) &&
6518 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006519 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6520 "'<' in entity '%s' is not allowed in attributes values\n",
6521 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006522 }
6523
6524 /*
6525 * Internal check, no parameter entities here ...
6526 */
6527 else {
6528 switch (ent->etype) {
6529 case XML_INTERNAL_PARAMETER_ENTITY:
6530 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006531 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6532 "Attempt to reference the parameter entity '%s'\n",
6533 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 break;
6535 default:
6536 break;
6537 }
6538 }
6539
6540 /*
6541 * [ WFC: No Recursion ]
6542 * A parsed entity must not contain a recursive reference
6543 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006544 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006545 */
6546
6547 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006548 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006549 }
6550 xmlFree(name);
6551 }
6552 }
6553 *str = ptr;
6554 return(ent);
6555}
6556
6557/**
6558 * xmlParsePEReference:
6559 * @ctxt: an XML parser context
6560 *
6561 * parse PEReference declarations
6562 * The entity content is handled directly by pushing it's content as
6563 * a new input stream.
6564 *
6565 * [69] PEReference ::= '%' Name ';'
6566 *
6567 * [ WFC: No Recursion ]
6568 * A parsed entity must not contain a recursive
6569 * reference to itself, either directly or indirectly.
6570 *
6571 * [ WFC: Entity Declared ]
6572 * In a document without any DTD, a document with only an internal DTD
6573 * subset which contains no parameter entity references, or a document
6574 * with "standalone='yes'", ... ... The declaration of a parameter
6575 * entity must precede any reference to it...
6576 *
6577 * [ VC: Entity Declared ]
6578 * In a document with an external subset or external parameter entities
6579 * with "standalone='no'", ... ... The declaration of a parameter entity
6580 * must precede any reference to it...
6581 *
6582 * [ WFC: In DTD ]
6583 * Parameter-entity references may only appear in the DTD.
6584 * NOTE: misleading but this is handled.
6585 */
6586void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006587xmlParsePEReference(xmlParserCtxtPtr ctxt)
6588{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006589 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006590 xmlEntityPtr entity = NULL;
6591 xmlParserInputPtr input;
6592
6593 if (RAW == '%') {
6594 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006595 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006596 if (name == NULL) {
6597 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6598 "xmlParsePEReference: no name\n");
6599 } else {
6600 if (RAW == ';') {
6601 NEXT;
6602 if ((ctxt->sax != NULL) &&
6603 (ctxt->sax->getParameterEntity != NULL))
6604 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6605 name);
6606 if (entity == NULL) {
6607 /*
6608 * [ WFC: Entity Declared ]
6609 * In a document without any DTD, a document with only an
6610 * internal DTD subset which contains no parameter entity
6611 * references, or a document with "standalone='yes'", ...
6612 * ... The declaration of a parameter entity must precede
6613 * any reference to it...
6614 */
6615 if ((ctxt->standalone == 1) ||
6616 ((ctxt->hasExternalSubset == 0) &&
6617 (ctxt->hasPErefs == 0))) {
6618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6619 "PEReference: %%%s; not found\n",
6620 name);
6621 } else {
6622 /*
6623 * [ VC: Entity Declared ]
6624 * In a document with an external subset or external
6625 * parameter entities with "standalone='no'", ...
6626 * ... The declaration of a parameter entity must
6627 * precede any reference to it...
6628 */
6629 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6630 "PEReference: %%%s; not found\n",
6631 name, NULL);
6632 ctxt->valid = 0;
6633 }
6634 } else {
6635 /*
6636 * Internal checking in case the entity quest barfed
6637 */
6638 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6639 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6640 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6641 "Internal: %%%s; is not a parameter entity\n",
6642 name, NULL);
6643 } else if (ctxt->input->free != deallocblankswrapper) {
6644 input =
6645 xmlNewBlanksWrapperInputStream(ctxt, entity);
6646 xmlPushInput(ctxt, input);
6647 } else {
6648 /*
6649 * TODO !!!
6650 * handle the extra spaces added before and after
6651 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6652 */
6653 input = xmlNewEntityInputStream(ctxt, entity);
6654 xmlPushInput(ctxt, input);
6655 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006656 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006657 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006658 xmlParseTextDecl(ctxt);
6659 if (ctxt->errNo ==
6660 XML_ERR_UNSUPPORTED_ENCODING) {
6661 /*
6662 * The XML REC instructs us to stop parsing
6663 * right here
6664 */
6665 ctxt->instate = XML_PARSER_EOF;
6666 return;
6667 }
6668 }
6669 }
6670 }
6671 ctxt->hasPErefs = 1;
6672 } else {
6673 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6674 }
6675 }
Owen Taylor3473f882001-02-23 17:55:21 +00006676 }
6677}
6678
6679/**
6680 * xmlParseStringPEReference:
6681 * @ctxt: an XML parser context
6682 * @str: a pointer to an index in the string
6683 *
6684 * parse PEReference declarations
6685 *
6686 * [69] PEReference ::= '%' Name ';'
6687 *
6688 * [ WFC: No Recursion ]
6689 * A parsed entity must not contain a recursive
6690 * reference to itself, either directly or indirectly.
6691 *
6692 * [ WFC: Entity Declared ]
6693 * In a document without any DTD, a document with only an internal DTD
6694 * subset which contains no parameter entity references, or a document
6695 * with "standalone='yes'", ... ... The declaration of a parameter
6696 * entity must precede any reference to it...
6697 *
6698 * [ VC: Entity Declared ]
6699 * In a document with an external subset or external parameter entities
6700 * with "standalone='no'", ... ... The declaration of a parameter entity
6701 * must precede any reference to it...
6702 *
6703 * [ WFC: In DTD ]
6704 * Parameter-entity references may only appear in the DTD.
6705 * NOTE: misleading but this is handled.
6706 *
6707 * Returns the string of the entity content.
6708 * str is updated to the current value of the index
6709 */
6710xmlEntityPtr
6711xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6712 const xmlChar *ptr;
6713 xmlChar cur;
6714 xmlChar *name;
6715 xmlEntityPtr entity = NULL;
6716
6717 if ((str == NULL) || (*str == NULL)) return(NULL);
6718 ptr = *str;
6719 cur = *ptr;
6720 if (cur == '%') {
6721 ptr++;
6722 cur = *ptr;
6723 name = xmlParseStringName(ctxt, &ptr);
6724 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006725 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6726 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006727 } else {
6728 cur = *ptr;
6729 if (cur == ';') {
6730 ptr++;
6731 cur = *ptr;
6732 if ((ctxt->sax != NULL) &&
6733 (ctxt->sax->getParameterEntity != NULL))
6734 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6735 name);
6736 if (entity == NULL) {
6737 /*
6738 * [ WFC: Entity Declared ]
6739 * In a document without any DTD, a document with only an
6740 * internal DTD subset which contains no parameter entity
6741 * references, or a document with "standalone='yes'", ...
6742 * ... The declaration of a parameter entity must precede
6743 * any reference to it...
6744 */
6745 if ((ctxt->standalone == 1) ||
6746 ((ctxt->hasExternalSubset == 0) &&
6747 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006748 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006749 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006750 } else {
6751 /*
6752 * [ VC: Entity Declared ]
6753 * In a document with an external subset or external
6754 * parameter entities with "standalone='no'", ...
6755 * ... The declaration of a parameter entity must
6756 * precede any reference to it...
6757 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006758 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6759 "PEReference: %%%s; not found\n",
6760 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006761 ctxt->valid = 0;
6762 }
6763 } else {
6764 /*
6765 * Internal checking in case the entity quest barfed
6766 */
6767 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6768 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006769 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6770 "%%%s; is not a parameter entity\n",
6771 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006772 }
6773 }
6774 ctxt->hasPErefs = 1;
6775 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006776 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006777 }
6778 xmlFree(name);
6779 }
6780 }
6781 *str = ptr;
6782 return(entity);
6783}
6784
6785/**
6786 * xmlParseDocTypeDecl:
6787 * @ctxt: an XML parser context
6788 *
6789 * parse a DOCTYPE declaration
6790 *
6791 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6792 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6793 *
6794 * [ VC: Root Element Type ]
6795 * The Name in the document type declaration must match the element
6796 * type of the root element.
6797 */
6798
6799void
6800xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006801 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006802 xmlChar *ExternalID = NULL;
6803 xmlChar *URI = NULL;
6804
6805 /*
6806 * We know that '<!DOCTYPE' has been detected.
6807 */
6808 SKIP(9);
6809
6810 SKIP_BLANKS;
6811
6812 /*
6813 * Parse the DOCTYPE name.
6814 */
6815 name = xmlParseName(ctxt);
6816 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006817 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6818 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006819 }
6820 ctxt->intSubName = name;
6821
6822 SKIP_BLANKS;
6823
6824 /*
6825 * Check for SystemID and ExternalID
6826 */
6827 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6828
6829 if ((URI != NULL) || (ExternalID != NULL)) {
6830 ctxt->hasExternalSubset = 1;
6831 }
6832 ctxt->extSubURI = URI;
6833 ctxt->extSubSystem = ExternalID;
6834
6835 SKIP_BLANKS;
6836
6837 /*
6838 * Create and update the internal subset.
6839 */
6840 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6841 (!ctxt->disableSAX))
6842 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6843
6844 /*
6845 * Is there any internal subset declarations ?
6846 * they are handled separately in xmlParseInternalSubset()
6847 */
6848 if (RAW == '[')
6849 return;
6850
6851 /*
6852 * We should be at the end of the DOCTYPE declaration.
6853 */
6854 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006855 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
6857 NEXT;
6858}
6859
6860/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006861 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006862 * @ctxt: an XML parser context
6863 *
6864 * parse the internal subset declaration
6865 *
6866 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6867 */
6868
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006869static void
Owen Taylor3473f882001-02-23 17:55:21 +00006870xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6871 /*
6872 * Is there any DTD definition ?
6873 */
6874 if (RAW == '[') {
6875 ctxt->instate = XML_PARSER_DTD;
6876 NEXT;
6877 /*
6878 * Parse the succession of Markup declarations and
6879 * PEReferences.
6880 * Subsequence (markupdecl | PEReference | S)*
6881 */
6882 while (RAW != ']') {
6883 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006884 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006885
6886 SKIP_BLANKS;
6887 xmlParseMarkupDecl(ctxt);
6888 xmlParsePEReference(ctxt);
6889
6890 /*
6891 * Pop-up of finished entities.
6892 */
6893 while ((RAW == 0) && (ctxt->inputNr > 1))
6894 xmlPopInput(ctxt);
6895
6896 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006897 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006898 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006899 break;
6900 }
6901 }
6902 if (RAW == ']') {
6903 NEXT;
6904 SKIP_BLANKS;
6905 }
6906 }
6907
6908 /*
6909 * We should be at the end of the DOCTYPE declaration.
6910 */
6911 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006912 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006913 }
6914 NEXT;
6915}
6916
Daniel Veillard81273902003-09-30 00:43:48 +00006917#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006918/**
6919 * xmlParseAttribute:
6920 * @ctxt: an XML parser context
6921 * @value: a xmlChar ** used to store the value of the attribute
6922 *
6923 * parse an attribute
6924 *
6925 * [41] Attribute ::= Name Eq AttValue
6926 *
6927 * [ WFC: No External Entity References ]
6928 * Attribute values cannot contain direct or indirect entity references
6929 * to external entities.
6930 *
6931 * [ WFC: No < in Attribute Values ]
6932 * The replacement text of any entity referred to directly or indirectly in
6933 * an attribute value (other than "&lt;") must not contain a <.
6934 *
6935 * [ VC: Attribute Value Type ]
6936 * The attribute must have been declared; the value must be of the type
6937 * declared for it.
6938 *
6939 * [25] Eq ::= S? '=' S?
6940 *
6941 * With namespace:
6942 *
6943 * [NS 11] Attribute ::= QName Eq AttValue
6944 *
6945 * Also the case QName == xmlns:??? is handled independently as a namespace
6946 * definition.
6947 *
6948 * Returns the attribute name, and the value in *value.
6949 */
6950
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006951const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006952xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006953 const xmlChar *name;
6954 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006955
6956 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006957 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006958 name = xmlParseName(ctxt);
6959 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006961 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006962 return(NULL);
6963 }
6964
6965 /*
6966 * read the value
6967 */
6968 SKIP_BLANKS;
6969 if (RAW == '=') {
6970 NEXT;
6971 SKIP_BLANKS;
6972 val = xmlParseAttValue(ctxt);
6973 ctxt->instate = XML_PARSER_CONTENT;
6974 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006975 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006976 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006977 return(NULL);
6978 }
6979
6980 /*
6981 * Check that xml:lang conforms to the specification
6982 * No more registered as an error, just generate a warning now
6983 * since this was deprecated in XML second edition
6984 */
6985 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6986 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006987 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6988 "Malformed value for xml:lang : %s\n",
6989 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006990 }
6991 }
6992
6993 /*
6994 * Check that xml:space conforms to the specification
6995 */
6996 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6997 if (xmlStrEqual(val, BAD_CAST "default"))
6998 *(ctxt->space) = 0;
6999 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7000 *(ctxt->space) = 1;
7001 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007002 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007003"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007004 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007005 }
7006 }
7007
7008 *value = val;
7009 return(name);
7010}
7011
7012/**
7013 * xmlParseStartTag:
7014 * @ctxt: an XML parser context
7015 *
7016 * parse a start of tag either for rule element or
7017 * EmptyElement. In both case we don't parse the tag closing chars.
7018 *
7019 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7020 *
7021 * [ WFC: Unique Att Spec ]
7022 * No attribute name may appear more than once in the same start-tag or
7023 * empty-element tag.
7024 *
7025 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7026 *
7027 * [ WFC: Unique Att Spec ]
7028 * No attribute name may appear more than once in the same start-tag or
7029 * empty-element tag.
7030 *
7031 * With namespace:
7032 *
7033 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7034 *
7035 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7036 *
7037 * Returns the element name parsed
7038 */
7039
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007040const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007041xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007042 const xmlChar *name;
7043 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007045 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007046 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007047 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007048 int i;
7049
7050 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007051 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007052
7053 name = xmlParseName(ctxt);
7054 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007055 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007056 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007057 return(NULL);
7058 }
7059
7060 /*
7061 * Now parse the attributes, it ends up with the ending
7062 *
7063 * (S Attribute)* S?
7064 */
7065 SKIP_BLANKS;
7066 GROW;
7067
Daniel Veillard21a0f912001-02-25 19:54:14 +00007068 while ((RAW != '>') &&
7069 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007070 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007071 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007072 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007073
7074 attname = xmlParseAttribute(ctxt, &attvalue);
7075 if ((attname != NULL) && (attvalue != NULL)) {
7076 /*
7077 * [ WFC: Unique Att Spec ]
7078 * No attribute name may appear more than once in the same
7079 * start-tag or empty-element tag.
7080 */
7081 for (i = 0; i < nbatts;i += 2) {
7082 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007083 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007084 xmlFree(attvalue);
7085 goto failed;
7086 }
7087 }
Owen Taylor3473f882001-02-23 17:55:21 +00007088 /*
7089 * Add the pair to atts
7090 */
7091 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007092 maxatts = 22; /* allow for 10 attrs by default */
7093 atts = (const xmlChar **)
7094 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007095 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007096 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007097 if (attvalue != NULL)
7098 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007099 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007100 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007101 ctxt->atts = atts;
7102 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007103 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007104 const xmlChar **n;
7105
Owen Taylor3473f882001-02-23 17:55:21 +00007106 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007107 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007108 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007109 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007110 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007111 if (attvalue != NULL)
7112 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007113 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007114 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007115 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007116 ctxt->atts = atts;
7117 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007118 }
7119 atts[nbatts++] = attname;
7120 atts[nbatts++] = attvalue;
7121 atts[nbatts] = NULL;
7122 atts[nbatts + 1] = NULL;
7123 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007124 if (attvalue != NULL)
7125 xmlFree(attvalue);
7126 }
7127
7128failed:
7129
Daniel Veillard3772de32002-12-17 10:31:45 +00007130 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007131 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7132 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007133 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007134 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7135 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007136 }
7137 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007138 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7139 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007140 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7141 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007142 break;
7143 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007144 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007145 GROW;
7146 }
7147
7148 /*
7149 * SAX: Start of Element !
7150 */
7151 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007152 (!ctxt->disableSAX)) {
7153 if (nbatts > 0)
7154 ctxt->sax->startElement(ctxt->userData, name, atts);
7155 else
7156 ctxt->sax->startElement(ctxt->userData, name, NULL);
7157 }
Owen Taylor3473f882001-02-23 17:55:21 +00007158
7159 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007160 /* Free only the content strings */
7161 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007162 if (atts[i] != NULL)
7163 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007164 }
7165 return(name);
7166}
7167
7168/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007169 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007170 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007171 * @line: line of the start tag
7172 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007173 *
7174 * parse an end of tag
7175 *
7176 * [42] ETag ::= '</' Name S? '>'
7177 *
7178 * With namespace
7179 *
7180 * [NS 9] ETag ::= '</' QName S? '>'
7181 */
7182
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007183static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007184xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007185 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007186
7187 GROW;
7188 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007189 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007190 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007191 return;
7192 }
7193 SKIP(2);
7194
Daniel Veillard46de64e2002-05-29 08:21:33 +00007195 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007196
7197 /*
7198 * We should definitely be at the ending "S? '>'" part
7199 */
7200 GROW;
7201 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007202 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007203 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007204 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007205 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007206
7207 /*
7208 * [ WFC: Element Type Match ]
7209 * The Name in an element's end-tag must match the element type in the
7210 * start-tag.
7211 *
7212 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007213 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007214 if (name == NULL) name = BAD_CAST "unparseable";
7215 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007216 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007217 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007218 }
7219
7220 /*
7221 * SAX: End of Tag
7222 */
7223 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7224 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007225 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007226
Daniel Veillarde57ec792003-09-10 10:50:59 +00007227 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007228 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007229 return;
7230}
7231
7232/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007233 * xmlParseEndTag:
7234 * @ctxt: an XML parser context
7235 *
7236 * parse an end of tag
7237 *
7238 * [42] ETag ::= '</' Name S? '>'
7239 *
7240 * With namespace
7241 *
7242 * [NS 9] ETag ::= '</' QName S? '>'
7243 */
7244
7245void
7246xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007247 xmlParseEndTag1(ctxt, 0);
7248}
Daniel Veillard81273902003-09-30 00:43:48 +00007249#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007250
7251/************************************************************************
7252 * *
7253 * SAX 2 specific operations *
7254 * *
7255 ************************************************************************/
7256
7257static const xmlChar *
7258xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7259 int len = 0, l;
7260 int c;
7261 int count = 0;
7262
7263 /*
7264 * Handler for more complex cases
7265 */
7266 GROW;
7267 c = CUR_CHAR(l);
7268 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007269 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007270 return(NULL);
7271 }
7272
7273 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007274 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007275 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007276 (IS_COMBINING(c)) ||
7277 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278 if (count++ > 100) {
7279 count = 0;
7280 GROW;
7281 }
7282 len += l;
7283 NEXTL(l);
7284 c = CUR_CHAR(l);
7285 }
7286 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7287}
7288
7289/*
7290 * xmlGetNamespace:
7291 * @ctxt: an XML parser context
7292 * @prefix: the prefix to lookup
7293 *
7294 * Lookup the namespace name for the @prefix (which ca be NULL)
7295 * The prefix must come from the @ctxt->dict dictionnary
7296 *
7297 * Returns the namespace name or NULL if not bound
7298 */
7299static const xmlChar *
7300xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7301 int i;
7302
Daniel Veillarde57ec792003-09-10 10:50:59 +00007303 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007305 if (ctxt->nsTab[i] == prefix) {
7306 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7307 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007308 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007309 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007310 return(NULL);
7311}
7312
7313/**
7314 * xmlParseNCName:
7315 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007316 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007317 *
7318 * parse an XML name.
7319 *
7320 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7321 * CombiningChar | Extender
7322 *
7323 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7324 *
7325 * Returns the Name parsed or NULL
7326 */
7327
7328static const xmlChar *
7329xmlParseNCName(xmlParserCtxtPtr ctxt) {
7330 const xmlChar *in;
7331 const xmlChar *ret;
7332 int count = 0;
7333
7334 /*
7335 * Accelerator for simple ASCII names
7336 */
7337 in = ctxt->input->cur;
7338 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7339 ((*in >= 0x41) && (*in <= 0x5A)) ||
7340 (*in == '_')) {
7341 in++;
7342 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7343 ((*in >= 0x41) && (*in <= 0x5A)) ||
7344 ((*in >= 0x30) && (*in <= 0x39)) ||
7345 (*in == '_') || (*in == '-') ||
7346 (*in == '.'))
7347 in++;
7348 if ((*in > 0) && (*in < 0x80)) {
7349 count = in - ctxt->input->cur;
7350 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7351 ctxt->input->cur = in;
7352 ctxt->nbChars += count;
7353 ctxt->input->col += count;
7354 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007355 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007356 }
7357 return(ret);
7358 }
7359 }
7360 return(xmlParseNCNameComplex(ctxt));
7361}
7362
7363/**
7364 * xmlParseQName:
7365 * @ctxt: an XML parser context
7366 * @prefix: pointer to store the prefix part
7367 *
7368 * parse an XML Namespace QName
7369 *
7370 * [6] QName ::= (Prefix ':')? LocalPart
7371 * [7] Prefix ::= NCName
7372 * [8] LocalPart ::= NCName
7373 *
7374 * Returns the Name parsed or NULL
7375 */
7376
7377static const xmlChar *
7378xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7379 const xmlChar *l, *p;
7380
7381 GROW;
7382
7383 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007384 if (l == NULL) {
7385 if (CUR == ':') {
7386 l = xmlParseName(ctxt);
7387 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007388 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7389 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007390 *prefix = NULL;
7391 return(l);
7392 }
7393 }
7394 return(NULL);
7395 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007396 if (CUR == ':') {
7397 NEXT;
7398 p = l;
7399 l = xmlParseNCName(ctxt);
7400 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007401 xmlChar *tmp;
7402
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007403 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7404 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007405 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7406 p = xmlDictLookup(ctxt->dict, tmp, -1);
7407 if (tmp != NULL) xmlFree(tmp);
7408 *prefix = NULL;
7409 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007410 }
7411 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007412 xmlChar *tmp;
7413
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007414 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7415 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007416 NEXT;
7417 tmp = (xmlChar *) xmlParseName(ctxt);
7418 if (tmp != NULL) {
7419 tmp = xmlBuildQName(tmp, l, NULL, 0);
7420 l = xmlDictLookup(ctxt->dict, tmp, -1);
7421 if (tmp != NULL) xmlFree(tmp);
7422 *prefix = p;
7423 return(l);
7424 }
7425 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7426 l = xmlDictLookup(ctxt->dict, tmp, -1);
7427 if (tmp != NULL) xmlFree(tmp);
7428 *prefix = p;
7429 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007430 }
7431 *prefix = p;
7432 } else
7433 *prefix = NULL;
7434 return(l);
7435}
7436
7437/**
7438 * xmlParseQNameAndCompare:
7439 * @ctxt: an XML parser context
7440 * @name: the localname
7441 * @prefix: the prefix, if any.
7442 *
7443 * parse an XML name and compares for match
7444 * (specialized for endtag parsing)
7445 *
7446 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7447 * and the name for mismatch
7448 */
7449
7450static const xmlChar *
7451xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7452 xmlChar const *prefix) {
7453 const xmlChar *cmp = name;
7454 const xmlChar *in;
7455 const xmlChar *ret;
7456 const xmlChar *prefix2;
7457
7458 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7459
7460 GROW;
7461 in = ctxt->input->cur;
7462
7463 cmp = prefix;
7464 while (*in != 0 && *in == *cmp) {
7465 ++in;
7466 ++cmp;
7467 }
7468 if ((*cmp == 0) && (*in == ':')) {
7469 in++;
7470 cmp = name;
7471 while (*in != 0 && *in == *cmp) {
7472 ++in;
7473 ++cmp;
7474 }
William M. Brack76e95df2003-10-18 16:20:14 +00007475 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 /* success */
7477 ctxt->input->cur = in;
7478 return((const xmlChar*) 1);
7479 }
7480 }
7481 /*
7482 * all strings coms from the dictionary, equality can be done directly
7483 */
7484 ret = xmlParseQName (ctxt, &prefix2);
7485 if ((ret == name) && (prefix == prefix2))
7486 return((const xmlChar*) 1);
7487 return ret;
7488}
7489
7490/**
7491 * xmlParseAttValueInternal:
7492 * @ctxt: an XML parser context
7493 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007494 * @alloc: whether the attribute was reallocated as a new string
7495 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007496 *
7497 * parse a value for an attribute.
7498 * NOTE: if no normalization is needed, the routine will return pointers
7499 * directly from the data buffer.
7500 *
7501 * 3.3.3 Attribute-Value Normalization:
7502 * Before the value of an attribute is passed to the application or
7503 * checked for validity, the XML processor must normalize it as follows:
7504 * - a character reference is processed by appending the referenced
7505 * character to the attribute value
7506 * - an entity reference is processed by recursively processing the
7507 * replacement text of the entity
7508 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7509 * appending #x20 to the normalized value, except that only a single
7510 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7511 * parsed entity or the literal entity value of an internal parsed entity
7512 * - other characters are processed by appending them to the normalized value
7513 * If the declared value is not CDATA, then the XML processor must further
7514 * process the normalized attribute value by discarding any leading and
7515 * trailing space (#x20) characters, and by replacing sequences of space
7516 * (#x20) characters by a single space (#x20) character.
7517 * All attributes for which no declaration has been read should be treated
7518 * by a non-validating parser as if declared CDATA.
7519 *
7520 * Returns the AttValue parsed or NULL. The value has to be freed by the
7521 * caller if it was copied, this can be detected by val[*len] == 0.
7522 */
7523
7524static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007525xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7526 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007527{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007528 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007529 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007530 xmlChar *ret = NULL;
7531
7532 GROW;
7533 in = (xmlChar *) CUR_PTR;
7534 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007535 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007536 return (NULL);
7537 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007538 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007539
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007540 /*
7541 * try to handle in this routine the most common case where no
7542 * allocation of a new string is required and where content is
7543 * pure ASCII.
7544 */
7545 limit = *in++;
7546 end = ctxt->input->end;
7547 start = in;
7548 if (in >= end) {
7549 const xmlChar *oldbase = ctxt->input->base;
7550 GROW;
7551 if (oldbase != ctxt->input->base) {
7552 long delta = ctxt->input->base - oldbase;
7553 start = start + delta;
7554 in = in + delta;
7555 }
7556 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007558 if (normalize) {
7559 /*
7560 * Skip any leading spaces
7561 */
7562 while ((in < end) && (*in != limit) &&
7563 ((*in == 0x20) || (*in == 0x9) ||
7564 (*in == 0xA) || (*in == 0xD))) {
7565 in++;
7566 start = in;
7567 if (in >= end) {
7568 const xmlChar *oldbase = ctxt->input->base;
7569 GROW;
7570 if (oldbase != ctxt->input->base) {
7571 long delta = ctxt->input->base - oldbase;
7572 start = start + delta;
7573 in = in + delta;
7574 }
7575 end = ctxt->input->end;
7576 }
7577 }
7578 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7579 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7580 if ((*in++ == 0x20) && (*in == 0x20)) break;
7581 if (in >= end) {
7582 const xmlChar *oldbase = ctxt->input->base;
7583 GROW;
7584 if (oldbase != ctxt->input->base) {
7585 long delta = ctxt->input->base - oldbase;
7586 start = start + delta;
7587 in = in + delta;
7588 }
7589 end = ctxt->input->end;
7590 }
7591 }
7592 last = in;
7593 /*
7594 * skip the trailing blanks
7595 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007596 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007597 while ((in < end) && (*in != limit) &&
7598 ((*in == 0x20) || (*in == 0x9) ||
7599 (*in == 0xA) || (*in == 0xD))) {
7600 in++;
7601 if (in >= end) {
7602 const xmlChar *oldbase = ctxt->input->base;
7603 GROW;
7604 if (oldbase != ctxt->input->base) {
7605 long delta = ctxt->input->base - oldbase;
7606 start = start + delta;
7607 in = in + delta;
7608 last = last + delta;
7609 }
7610 end = ctxt->input->end;
7611 }
7612 }
7613 if (*in != limit) goto need_complex;
7614 } else {
7615 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7616 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7617 in++;
7618 if (in >= end) {
7619 const xmlChar *oldbase = ctxt->input->base;
7620 GROW;
7621 if (oldbase != ctxt->input->base) {
7622 long delta = ctxt->input->base - oldbase;
7623 start = start + delta;
7624 in = in + delta;
7625 }
7626 end = ctxt->input->end;
7627 }
7628 }
7629 last = in;
7630 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007631 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007632 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007633 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007634 *len = last - start;
7635 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007636 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007637 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007638 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007639 }
7640 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007641 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007642 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007643need_complex:
7644 if (alloc) *alloc = 1;
7645 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007646}
7647
7648/**
7649 * xmlParseAttribute2:
7650 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007651 * @pref: the element prefix
7652 * @elem: the element name
7653 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007655 * @len: an int * to save the length of the attribute
7656 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657 *
7658 * parse an attribute in the new SAX2 framework.
7659 *
7660 * Returns the attribute name, and the value in *value, .
7661 */
7662
7663static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007664xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7665 const xmlChar *pref, const xmlChar *elem,
7666 const xmlChar **prefix, xmlChar **value,
7667 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007669 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007670 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007671
7672 *value = NULL;
7673 GROW;
7674 name = xmlParseQName(ctxt, prefix);
7675 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007676 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7677 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007678 return(NULL);
7679 }
7680
7681 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007682 * get the type if needed
7683 */
7684 if (ctxt->attsSpecial != NULL) {
7685 int type;
7686
7687 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7688 pref, elem, *prefix, name);
7689 if (type != 0) normalize = 1;
7690 }
7691
7692 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007693 * read the value
7694 */
7695 SKIP_BLANKS;
7696 if (RAW == '=') {
7697 NEXT;
7698 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007699 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007700 ctxt->instate = XML_PARSER_CONTENT;
7701 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007702 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007703 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 return(NULL);
7705 }
7706
Daniel Veillardd8925572005-06-08 22:34:55 +00007707 if (*prefix == ctxt->str_xml) {
7708 /*
7709 * Check that xml:lang conforms to the specification
7710 * No more registered as an error, just generate a warning now
7711 * since this was deprecated in XML second edition
7712 */
7713 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7714 internal_val = xmlStrndup(val, *len);
7715 if (!xmlCheckLanguageID(internal_val)) {
7716 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7717 "Malformed value for xml:lang : %s\n",
7718 internal_val, NULL);
7719 }
7720 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721
Daniel Veillardd8925572005-06-08 22:34:55 +00007722 /*
7723 * Check that xml:space conforms to the specification
7724 */
7725 if (xmlStrEqual(name, BAD_CAST "space")) {
7726 internal_val = xmlStrndup(val, *len);
7727 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7728 *(ctxt->space) = 0;
7729 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7730 *(ctxt->space) = 1;
7731 else {
7732 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007733"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007734 internal_val, NULL);
7735 }
7736 }
7737 if (internal_val) {
7738 xmlFree(internal_val);
7739 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007740 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741
7742 *value = val;
7743 return(name);
7744}
7745
7746/**
7747 * xmlParseStartTag2:
7748 * @ctxt: an XML parser context
7749 *
7750 * parse a start of tag either for rule element or
7751 * EmptyElement. In both case we don't parse the tag closing chars.
7752 * This routine is called when running SAX2 parsing
7753 *
7754 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7755 *
7756 * [ WFC: Unique Att Spec ]
7757 * No attribute name may appear more than once in the same start-tag or
7758 * empty-element tag.
7759 *
7760 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7761 *
7762 * [ WFC: Unique Att Spec ]
7763 * No attribute name may appear more than once in the same start-tag or
7764 * empty-element tag.
7765 *
7766 * With namespace:
7767 *
7768 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7769 *
7770 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7771 *
7772 * Returns the element name parsed
7773 */
7774
7775static const xmlChar *
7776xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007777 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007778 const xmlChar *localname;
7779 const xmlChar *prefix;
7780 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007781 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 const xmlChar *nsname;
7783 xmlChar *attvalue;
7784 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007786 int nratts, nbatts, nbdef;
7787 int i, j, nbNs, attval;
7788 const xmlChar *base;
7789 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007790 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007791
7792 if (RAW != '<') return(NULL);
7793 NEXT1;
7794
7795 /*
7796 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7797 * point since the attribute values may be stored as pointers to
7798 * the buffer and calling SHRINK would destroy them !
7799 * The Shrinking is only possible once the full set of attribute
7800 * callbacks have been done.
7801 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007802reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007803 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007804 base = ctxt->input->base;
7805 cur = ctxt->input->cur - ctxt->input->base;
7806 nbatts = 0;
7807 nratts = 0;
7808 nbdef = 0;
7809 nbNs = 0;
7810 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007811 /* Forget any namespaces added during an earlier parse of this element. */
7812 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007813
7814 localname = xmlParseQName(ctxt, &prefix);
7815 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007816 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7817 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007818 return(NULL);
7819 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007820 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821
7822 /*
7823 * Now parse the attributes, it ends up with the ending
7824 *
7825 * (S Attribute)* S?
7826 */
7827 SKIP_BLANKS;
7828 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007829 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007830
7831 while ((RAW != '>') &&
7832 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007833 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007834 const xmlChar *q = CUR_PTR;
7835 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007836 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007837
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007838 attname = xmlParseAttribute2(ctxt, prefix, localname,
7839 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007840 if ((attname != NULL) && (attvalue != NULL)) {
7841 if (len < 0) len = xmlStrlen(attvalue);
7842 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007843 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7844 xmlURIPtr uri;
7845
7846 if (*URL != 0) {
7847 uri = xmlParseURI((const char *) URL);
7848 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007849 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7850 "xmlns: %s not a valid URI\n",
7851 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007852 } else {
7853 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007854 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7855 "xmlns: URI %s is not absolute\n",
7856 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007857 }
7858 xmlFreeURI(uri);
7859 }
7860 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007862 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007863 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007864 for (j = 1;j <= nbNs;j++)
7865 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7866 break;
7867 if (j <= nbNs)
7868 xmlErrAttributeDup(ctxt, NULL, attname);
7869 else
7870 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007871 if (alloc != 0) xmlFree(attvalue);
7872 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007873 continue;
7874 }
7875 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007876 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7877 xmlURIPtr uri;
7878
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007879 if (attname == ctxt->str_xml) {
7880 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007881 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7882 "xml namespace prefix mapped to wrong URI\n",
7883 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007884 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007885 /*
7886 * Do not keep a namespace definition node
7887 */
7888 if (alloc != 0) xmlFree(attvalue);
7889 SKIP_BLANKS;
7890 continue;
7891 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007892 uri = xmlParseURI((const char *) URL);
7893 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007894 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7895 "xmlns:%s: '%s' is not a valid URI\n",
7896 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007897 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007898 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007899 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7900 "xmlns:%s: URI %s is not absolute\n",
7901 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007902 }
7903 xmlFreeURI(uri);
7904 }
7905
Daniel Veillard0fb18932003-09-07 09:14:37 +00007906 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007907 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007908 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007909 for (j = 1;j <= nbNs;j++)
7910 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7911 break;
7912 if (j <= nbNs)
7913 xmlErrAttributeDup(ctxt, aprefix, attname);
7914 else
7915 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007916 if (alloc != 0) xmlFree(attvalue);
7917 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007918 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007919 continue;
7920 }
7921
7922 /*
7923 * Add the pair to atts
7924 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007925 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7926 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007927 if (attvalue[len] == 0)
7928 xmlFree(attvalue);
7929 goto failed;
7930 }
7931 maxatts = ctxt->maxatts;
7932 atts = ctxt->atts;
7933 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007934 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007935 atts[nbatts++] = attname;
7936 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007937 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007938 atts[nbatts++] = attvalue;
7939 attvalue += len;
7940 atts[nbatts++] = attvalue;
7941 /*
7942 * tag if some deallocation is needed
7943 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007944 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007945 } else {
7946 if ((attvalue != NULL) && (attvalue[len] == 0))
7947 xmlFree(attvalue);
7948 }
7949
7950failed:
7951
7952 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007953 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7955 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007956 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7958 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007959 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007960 }
7961 SKIP_BLANKS;
7962 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7963 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007964 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007965 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 break;
7967 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007969 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 }
7971
Daniel Veillard0fb18932003-09-07 09:14:37 +00007972 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007973 * The attributes defaulting
7974 */
7975 if (ctxt->attsDefault != NULL) {
7976 xmlDefAttrsPtr defaults;
7977
7978 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7979 if (defaults != NULL) {
7980 for (i = 0;i < defaults->nbAttrs;i++) {
7981 attname = defaults->values[4 * i];
7982 aprefix = defaults->values[4 * i + 1];
7983
7984 /*
7985 * special work for namespaces defaulted defs
7986 */
7987 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7988 /*
7989 * check that it's not a defined namespace
7990 */
7991 for (j = 1;j <= nbNs;j++)
7992 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7993 break;
7994 if (j <= nbNs) continue;
7995
7996 nsname = xmlGetNamespace(ctxt, NULL);
7997 if (nsname != defaults->values[4 * i + 2]) {
7998 if (nsPush(ctxt, NULL,
7999 defaults->values[4 * i + 2]) > 0)
8000 nbNs++;
8001 }
8002 } else if (aprefix == ctxt->str_xmlns) {
8003 /*
8004 * check that it's not a defined namespace
8005 */
8006 for (j = 1;j <= nbNs;j++)
8007 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8008 break;
8009 if (j <= nbNs) continue;
8010
8011 nsname = xmlGetNamespace(ctxt, attname);
8012 if (nsname != defaults->values[2]) {
8013 if (nsPush(ctxt, attname,
8014 defaults->values[4 * i + 2]) > 0)
8015 nbNs++;
8016 }
8017 } else {
8018 /*
8019 * check that it's not a defined attribute
8020 */
8021 for (j = 0;j < nbatts;j+=5) {
8022 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8023 break;
8024 }
8025 if (j < nbatts) continue;
8026
8027 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8028 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008029 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008030 }
8031 maxatts = ctxt->maxatts;
8032 atts = ctxt->atts;
8033 }
8034 atts[nbatts++] = attname;
8035 atts[nbatts++] = aprefix;
8036 if (aprefix == NULL)
8037 atts[nbatts++] = NULL;
8038 else
8039 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8040 atts[nbatts++] = defaults->values[4 * i + 2];
8041 atts[nbatts++] = defaults->values[4 * i + 3];
8042 nbdef++;
8043 }
8044 }
8045 }
8046 }
8047
Daniel Veillarde70c8772003-11-25 07:21:18 +00008048 /*
8049 * The attributes checkings
8050 */
8051 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008052 /*
8053 * The default namespace does not apply to attribute names.
8054 */
8055 if (atts[i + 1] != NULL) {
8056 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8057 if (nsname == NULL) {
8058 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8059 "Namespace prefix %s for %s on %s is not defined\n",
8060 atts[i + 1], atts[i], localname);
8061 }
8062 atts[i + 2] = nsname;
8063 } else
8064 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008065 /*
8066 * [ WFC: Unique Att Spec ]
8067 * No attribute name may appear more than once in the same
8068 * start-tag or empty-element tag.
8069 * As extended by the Namespace in XML REC.
8070 */
8071 for (j = 0; j < i;j += 5) {
8072 if (atts[i] == atts[j]) {
8073 if (atts[i+1] == atts[j+1]) {
8074 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8075 break;
8076 }
8077 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8078 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8079 "Namespaced Attribute %s in '%s' redefined\n",
8080 atts[i], nsname, NULL);
8081 break;
8082 }
8083 }
8084 }
8085 }
8086
Daniel Veillarde57ec792003-09-10 10:50:59 +00008087 nsname = xmlGetNamespace(ctxt, prefix);
8088 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008089 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8090 "Namespace prefix %s on %s is not defined\n",
8091 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008092 }
8093 *pref = prefix;
8094 *URI = nsname;
8095
8096 /*
8097 * SAX: Start of Element !
8098 */
8099 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8100 (!ctxt->disableSAX)) {
8101 if (nbNs > 0)
8102 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8103 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8104 nbatts / 5, nbdef, atts);
8105 else
8106 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8107 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8108 }
8109
8110 /*
8111 * Free up attribute allocated strings if needed
8112 */
8113 if (attval != 0) {
8114 for (i = 3,j = 0; j < nratts;i += 5,j++)
8115 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8116 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008117 }
8118
8119 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008120
8121base_changed:
8122 /*
8123 * the attribute strings are valid iif the base didn't changed
8124 */
8125 if (attval != 0) {
8126 for (i = 3,j = 0; j < nratts;i += 5,j++)
8127 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8128 xmlFree((xmlChar *) atts[i]);
8129 }
8130 ctxt->input->cur = ctxt->input->base + cur;
8131 if (ctxt->wellFormed == 1) {
8132 goto reparse;
8133 }
8134 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008135}
8136
8137/**
8138 * xmlParseEndTag2:
8139 * @ctxt: an XML parser context
8140 * @line: line of the start tag
8141 * @nsNr: number of namespaces on the start tag
8142 *
8143 * parse an end of tag
8144 *
8145 * [42] ETag ::= '</' Name S? '>'
8146 *
8147 * With namespace
8148 *
8149 * [NS 9] ETag ::= '</' QName S? '>'
8150 */
8151
8152static void
8153xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008154 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008155 const xmlChar *name;
8156
8157 GROW;
8158 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008159 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008160 return;
8161 }
8162 SKIP(2);
8163
William M. Brack13dfa872004-09-18 04:52:08 +00008164 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008165 if (ctxt->input->cur[tlen] == '>') {
8166 ctxt->input->cur += tlen + 1;
8167 goto done;
8168 }
8169 ctxt->input->cur += tlen;
8170 name = (xmlChar*)1;
8171 } else {
8172 if (prefix == NULL)
8173 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8174 else
8175 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8176 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008177
8178 /*
8179 * We should definitely be at the ending "S? '>'" part
8180 */
8181 GROW;
8182 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008183 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008184 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008185 } else
8186 NEXT1;
8187
8188 /*
8189 * [ WFC: Element Type Match ]
8190 * The Name in an element's end-tag must match the element type in the
8191 * start-tag.
8192 *
8193 */
8194 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008195 if (name == NULL) name = BAD_CAST "unparseable";
8196 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008197 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008198 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008199 }
8200
8201 /*
8202 * SAX: End of Tag
8203 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008204done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8206 (!ctxt->disableSAX))
8207 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8208
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 spacePop(ctxt);
8210 if (nsNr != 0)
8211 nsPop(ctxt, nsNr);
8212 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008213}
8214
8215/**
Owen Taylor3473f882001-02-23 17:55:21 +00008216 * xmlParseCDSect:
8217 * @ctxt: an XML parser context
8218 *
8219 * Parse escaped pure raw content.
8220 *
8221 * [18] CDSect ::= CDStart CData CDEnd
8222 *
8223 * [19] CDStart ::= '<![CDATA['
8224 *
8225 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8226 *
8227 * [21] CDEnd ::= ']]>'
8228 */
8229void
8230xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8231 xmlChar *buf = NULL;
8232 int len = 0;
8233 int size = XML_PARSER_BUFFER_SIZE;
8234 int r, rl;
8235 int s, sl;
8236 int cur, l;
8237 int count = 0;
8238
Daniel Veillard8f597c32003-10-06 08:19:27 +00008239 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008240 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008241 SKIP(9);
8242 } else
8243 return;
8244
8245 ctxt->instate = XML_PARSER_CDATA_SECTION;
8246 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008247 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008248 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 ctxt->instate = XML_PARSER_CONTENT;
8250 return;
8251 }
8252 NEXTL(rl);
8253 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008254 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008255 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008256 ctxt->instate = XML_PARSER_CONTENT;
8257 return;
8258 }
8259 NEXTL(sl);
8260 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008264 return;
8265 }
William M. Brack871611b2003-10-18 04:53:14 +00008266 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008267 ((r != ']') || (s != ']') || (cur != '>'))) {
8268 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008269 xmlChar *tmp;
8270
Owen Taylor3473f882001-02-23 17:55:21 +00008271 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008272 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8273 if (tmp == NULL) {
8274 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008275 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008276 return;
8277 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008278 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008279 }
8280 COPY_BUF(rl,buf,len,r);
8281 r = s;
8282 rl = sl;
8283 s = cur;
8284 sl = l;
8285 count++;
8286 if (count > 50) {
8287 GROW;
8288 count = 0;
8289 }
8290 NEXTL(l);
8291 cur = CUR_CHAR(l);
8292 }
8293 buf[len] = 0;
8294 ctxt->instate = XML_PARSER_CONTENT;
8295 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008296 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008297 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008298 xmlFree(buf);
8299 return;
8300 }
8301 NEXTL(l);
8302
8303 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008304 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008305 */
8306 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8307 if (ctxt->sax->cdataBlock != NULL)
8308 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008309 else if (ctxt->sax->characters != NULL)
8310 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008311 }
8312 xmlFree(buf);
8313}
8314
8315/**
8316 * xmlParseContent:
8317 * @ctxt: an XML parser context
8318 *
8319 * Parse a content:
8320 *
8321 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8322 */
8323
8324void
8325xmlParseContent(xmlParserCtxtPtr ctxt) {
8326 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008327 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008328 ((RAW != '<') || (NXT(1) != '/'))) {
8329 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008330 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008331 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008332
8333 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008334 * First case : a Processing Instruction.
8335 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008336 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008337 xmlParsePI(ctxt);
8338 }
8339
8340 /*
8341 * Second case : a CDSection
8342 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008343 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008344 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008345 xmlParseCDSect(ctxt);
8346 }
8347
8348 /*
8349 * Third case : a comment
8350 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008351 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008352 (NXT(2) == '-') && (NXT(3) == '-')) {
8353 xmlParseComment(ctxt);
8354 ctxt->instate = XML_PARSER_CONTENT;
8355 }
8356
8357 /*
8358 * Fourth case : a sub-element.
8359 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008360 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008361 xmlParseElement(ctxt);
8362 }
8363
8364 /*
8365 * Fifth case : a reference. If if has not been resolved,
8366 * parsing returns it's Name, create the node
8367 */
8368
Daniel Veillard21a0f912001-02-25 19:54:14 +00008369 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008370 xmlParseReference(ctxt);
8371 }
8372
8373 /*
8374 * Last case, text. Note that References are handled directly.
8375 */
8376 else {
8377 xmlParseCharData(ctxt, 0);
8378 }
8379
8380 GROW;
8381 /*
8382 * Pop-up of finished entities.
8383 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008384 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008385 xmlPopInput(ctxt);
8386 SHRINK;
8387
Daniel Veillardfdc91562002-07-01 21:52:03 +00008388 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008389 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8390 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008391 ctxt->instate = XML_PARSER_EOF;
8392 break;
8393 }
8394 }
8395}
8396
8397/**
8398 * xmlParseElement:
8399 * @ctxt: an XML parser context
8400 *
8401 * parse an XML element, this is highly recursive
8402 *
8403 * [39] element ::= EmptyElemTag | STag content ETag
8404 *
8405 * [ WFC: Element Type Match ]
8406 * The Name in an element's end-tag must match the element type in the
8407 * start-tag.
8408 *
Owen Taylor3473f882001-02-23 17:55:21 +00008409 */
8410
8411void
8412xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008413 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008414 const xmlChar *prefix;
8415 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008416 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008417 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008418 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008419 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008420
8421 /* Capture start position */
8422 if (ctxt->record_info) {
8423 node_info.begin_pos = ctxt->input->consumed +
8424 (CUR_PTR - ctxt->input->base);
8425 node_info.begin_line = ctxt->input->line;
8426 }
8427
8428 if (ctxt->spaceNr == 0)
8429 spacePush(ctxt, -1);
8430 else
8431 spacePush(ctxt, *ctxt->space);
8432
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008433 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008434#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008435 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008436#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008437 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008438#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439 else
8440 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008441#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008442 if (name == NULL) {
8443 spacePop(ctxt);
8444 return;
8445 }
8446 namePush(ctxt, name);
8447 ret = ctxt->node;
8448
Daniel Veillard4432df22003-09-28 18:58:27 +00008449#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008450 /*
8451 * [ VC: Root Element Type ]
8452 * The Name in the document type declaration must match the element
8453 * type of the root element.
8454 */
8455 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8456 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8457 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008458#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008459
8460 /*
8461 * Check for an Empty Element.
8462 */
8463 if ((RAW == '/') && (NXT(1) == '>')) {
8464 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 if (ctxt->sax2) {
8466 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8467 (!ctxt->disableSAX))
8468 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008469#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008470 } else {
8471 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8472 (!ctxt->disableSAX))
8473 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008474#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008475 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008476 namePop(ctxt);
8477 spacePop(ctxt);
8478 if (nsNr != ctxt->nsNr)
8479 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 if ( ret != NULL && ctxt->record_info ) {
8481 node_info.end_pos = ctxt->input->consumed +
8482 (CUR_PTR - ctxt->input->base);
8483 node_info.end_line = ctxt->input->line;
8484 node_info.node = ret;
8485 xmlParserAddNodeInfo(ctxt, &node_info);
8486 }
8487 return;
8488 }
8489 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008490 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008491 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008492 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8493 "Couldn't find end of Start Tag %s line %d\n",
8494 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008495
8496 /*
8497 * end of parsing of this node.
8498 */
8499 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008501 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008502 if (nsNr != ctxt->nsNr)
8503 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008504
8505 /*
8506 * Capture end position and add node
8507 */
8508 if ( ret != NULL && ctxt->record_info ) {
8509 node_info.end_pos = ctxt->input->consumed +
8510 (CUR_PTR - ctxt->input->base);
8511 node_info.end_line = ctxt->input->line;
8512 node_info.node = ret;
8513 xmlParserAddNodeInfo(ctxt, &node_info);
8514 }
8515 return;
8516 }
8517
8518 /*
8519 * Parse the content of the element:
8520 */
8521 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008522 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008523 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008524 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008525 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008526
8527 /*
8528 * end of parsing of this node.
8529 */
8530 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008531 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008532 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008533 if (nsNr != ctxt->nsNr)
8534 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008535 return;
8536 }
8537
8538 /*
8539 * parse the end of tag: '</' should be here.
8540 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008541 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008542 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008543 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008544 }
8545#ifdef LIBXML_SAX1_ENABLED
8546 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008548#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008549
8550 /*
8551 * Capture end position and add node
8552 */
8553 if ( ret != NULL && ctxt->record_info ) {
8554 node_info.end_pos = ctxt->input->consumed +
8555 (CUR_PTR - ctxt->input->base);
8556 node_info.end_line = ctxt->input->line;
8557 node_info.node = ret;
8558 xmlParserAddNodeInfo(ctxt, &node_info);
8559 }
8560}
8561
8562/**
8563 * xmlParseVersionNum:
8564 * @ctxt: an XML parser context
8565 *
8566 * parse the XML version value.
8567 *
8568 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8569 *
8570 * Returns the string giving the XML version number, or NULL
8571 */
8572xmlChar *
8573xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8574 xmlChar *buf = NULL;
8575 int len = 0;
8576 int size = 10;
8577 xmlChar cur;
8578
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008579 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008580 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008581 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008582 return(NULL);
8583 }
8584 cur = CUR;
8585 while (((cur >= 'a') && (cur <= 'z')) ||
8586 ((cur >= 'A') && (cur <= 'Z')) ||
8587 ((cur >= '0') && (cur <= '9')) ||
8588 (cur == '_') || (cur == '.') ||
8589 (cur == ':') || (cur == '-')) {
8590 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008591 xmlChar *tmp;
8592
Owen Taylor3473f882001-02-23 17:55:21 +00008593 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008594 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8595 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008597 return(NULL);
8598 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008599 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008600 }
8601 buf[len++] = cur;
8602 NEXT;
8603 cur=CUR;
8604 }
8605 buf[len] = 0;
8606 return(buf);
8607}
8608
8609/**
8610 * xmlParseVersionInfo:
8611 * @ctxt: an XML parser context
8612 *
8613 * parse the XML version.
8614 *
8615 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8616 *
8617 * [25] Eq ::= S? '=' S?
8618 *
8619 * Returns the version string, e.g. "1.0"
8620 */
8621
8622xmlChar *
8623xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8624 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008625
Daniel Veillarda07050d2003-10-19 14:46:32 +00008626 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008627 SKIP(7);
8628 SKIP_BLANKS;
8629 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008630 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008631 return(NULL);
8632 }
8633 NEXT;
8634 SKIP_BLANKS;
8635 if (RAW == '"') {
8636 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008637 version = xmlParseVersionNum(ctxt);
8638 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008639 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 } else
8641 NEXT;
8642 } else if (RAW == '\''){
8643 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008644 version = xmlParseVersionNum(ctxt);
8645 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008646 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 } else
8648 NEXT;
8649 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008650 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008651 }
8652 }
8653 return(version);
8654}
8655
8656/**
8657 * xmlParseEncName:
8658 * @ctxt: an XML parser context
8659 *
8660 * parse the XML encoding name
8661 *
8662 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8663 *
8664 * Returns the encoding name value or NULL
8665 */
8666xmlChar *
8667xmlParseEncName(xmlParserCtxtPtr ctxt) {
8668 xmlChar *buf = NULL;
8669 int len = 0;
8670 int size = 10;
8671 xmlChar cur;
8672
8673 cur = CUR;
8674 if (((cur >= 'a') && (cur <= 'z')) ||
8675 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008676 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008677 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008678 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008679 return(NULL);
8680 }
8681
8682 buf[len++] = cur;
8683 NEXT;
8684 cur = CUR;
8685 while (((cur >= 'a') && (cur <= 'z')) ||
8686 ((cur >= 'A') && (cur <= 'Z')) ||
8687 ((cur >= '0') && (cur <= '9')) ||
8688 (cur == '.') || (cur == '_') ||
8689 (cur == '-')) {
8690 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008691 xmlChar *tmp;
8692
Owen Taylor3473f882001-02-23 17:55:21 +00008693 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008694 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8695 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008696 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008697 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008698 return(NULL);
8699 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008700 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008701 }
8702 buf[len++] = cur;
8703 NEXT;
8704 cur = CUR;
8705 if (cur == 0) {
8706 SHRINK;
8707 GROW;
8708 cur = CUR;
8709 }
8710 }
8711 buf[len] = 0;
8712 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008713 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008714 }
8715 return(buf);
8716}
8717
8718/**
8719 * xmlParseEncodingDecl:
8720 * @ctxt: an XML parser context
8721 *
8722 * parse the XML encoding declaration
8723 *
8724 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8725 *
8726 * this setups the conversion filters.
8727 *
8728 * Returns the encoding value or NULL
8729 */
8730
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008731const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008732xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8733 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008734
8735 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008736 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008737 SKIP(8);
8738 SKIP_BLANKS;
8739 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008740 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008741 return(NULL);
8742 }
8743 NEXT;
8744 SKIP_BLANKS;
8745 if (RAW == '"') {
8746 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008747 encoding = xmlParseEncName(ctxt);
8748 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008749 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008750 } else
8751 NEXT;
8752 } else if (RAW == '\''){
8753 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008754 encoding = xmlParseEncName(ctxt);
8755 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008756 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008757 } else
8758 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008759 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008760 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008761 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008762 /*
8763 * UTF-16 encoding stwich has already taken place at this stage,
8764 * more over the little-endian/big-endian selection is already done
8765 */
8766 if ((encoding != NULL) &&
8767 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8768 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008769 if (ctxt->encoding != NULL)
8770 xmlFree((xmlChar *) ctxt->encoding);
8771 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008772 }
8773 /*
8774 * UTF-8 encoding is handled natively
8775 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008776 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008777 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8778 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008779 if (ctxt->encoding != NULL)
8780 xmlFree((xmlChar *) ctxt->encoding);
8781 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008782 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008783 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008784 xmlCharEncodingHandlerPtr handler;
8785
8786 if (ctxt->input->encoding != NULL)
8787 xmlFree((xmlChar *) ctxt->input->encoding);
8788 ctxt->input->encoding = encoding;
8789
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008790 handler = xmlFindCharEncodingHandler((const char *) encoding);
8791 if (handler != NULL) {
8792 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008793 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008794 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008795 "Unsupported encoding %s\n", encoding);
8796 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008797 }
8798 }
8799 }
8800 return(encoding);
8801}
8802
8803/**
8804 * xmlParseSDDecl:
8805 * @ctxt: an XML parser context
8806 *
8807 * parse the XML standalone declaration
8808 *
8809 * [32] SDDecl ::= S 'standalone' Eq
8810 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8811 *
8812 * [ VC: Standalone Document Declaration ]
8813 * TODO The standalone document declaration must have the value "no"
8814 * if any external markup declarations contain declarations of:
8815 * - attributes with default values, if elements to which these
8816 * attributes apply appear in the document without specifications
8817 * of values for these attributes, or
8818 * - entities (other than amp, lt, gt, apos, quot), if references
8819 * to those entities appear in the document, or
8820 * - attributes with values subject to normalization, where the
8821 * attribute appears in the document with a value which will change
8822 * as a result of normalization, or
8823 * - element types with element content, if white space occurs directly
8824 * within any instance of those types.
8825 *
8826 * Returns 1 if standalone, 0 otherwise
8827 */
8828
8829int
8830xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8831 int standalone = -1;
8832
8833 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008834 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008835 SKIP(10);
8836 SKIP_BLANKS;
8837 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008838 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008839 return(standalone);
8840 }
8841 NEXT;
8842 SKIP_BLANKS;
8843 if (RAW == '\''){
8844 NEXT;
8845 if ((RAW == 'n') && (NXT(1) == 'o')) {
8846 standalone = 0;
8847 SKIP(2);
8848 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8849 (NXT(2) == 's')) {
8850 standalone = 1;
8851 SKIP(3);
8852 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008853 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008854 }
8855 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008856 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008857 } else
8858 NEXT;
8859 } else if (RAW == '"'){
8860 NEXT;
8861 if ((RAW == 'n') && (NXT(1) == 'o')) {
8862 standalone = 0;
8863 SKIP(2);
8864 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8865 (NXT(2) == 's')) {
8866 standalone = 1;
8867 SKIP(3);
8868 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008869 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008870 }
8871 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008872 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008873 } else
8874 NEXT;
8875 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008876 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008877 }
8878 }
8879 return(standalone);
8880}
8881
8882/**
8883 * xmlParseXMLDecl:
8884 * @ctxt: an XML parser context
8885 *
8886 * parse an XML declaration header
8887 *
8888 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8889 */
8890
8891void
8892xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8893 xmlChar *version;
8894
8895 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008896 * This value for standalone indicates that the document has an
8897 * XML declaration but it does not have a standalone attribute.
8898 * It will be overwritten later if a standalone attribute is found.
8899 */
8900 ctxt->input->standalone = -2;
8901
8902 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008903 * We know that '<?xml' is here.
8904 */
8905 SKIP(5);
8906
William M. Brack76e95df2003-10-18 16:20:14 +00008907 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008908 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8909 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008910 }
8911 SKIP_BLANKS;
8912
8913 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008914 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008915 */
8916 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008917 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008918 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008919 } else {
8920 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8921 /*
8922 * TODO: Blueberry should be detected here
8923 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008924 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8925 "Unsupported version '%s'\n",
8926 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008927 }
8928 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008929 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008930 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008931 }
Owen Taylor3473f882001-02-23 17:55:21 +00008932
8933 /*
8934 * We may have the encoding declaration
8935 */
William M. Brack76e95df2003-10-18 16:20:14 +00008936 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008937 if ((RAW == '?') && (NXT(1) == '>')) {
8938 SKIP(2);
8939 return;
8940 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008942 }
8943 xmlParseEncodingDecl(ctxt);
8944 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8945 /*
8946 * The XML REC instructs us to stop parsing right here
8947 */
8948 return;
8949 }
8950
8951 /*
8952 * We may have the standalone status.
8953 */
William M. Brack76e95df2003-10-18 16:20:14 +00008954 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008955 if ((RAW == '?') && (NXT(1) == '>')) {
8956 SKIP(2);
8957 return;
8958 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008959 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008960 }
8961 SKIP_BLANKS;
8962 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8963
8964 SKIP_BLANKS;
8965 if ((RAW == '?') && (NXT(1) == '>')) {
8966 SKIP(2);
8967 } else if (RAW == '>') {
8968 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008969 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008970 NEXT;
8971 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008972 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008973 MOVETO_ENDTAG(CUR_PTR);
8974 NEXT;
8975 }
8976}
8977
8978/**
8979 * xmlParseMisc:
8980 * @ctxt: an XML parser context
8981 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008982 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008983 *
8984 * [27] Misc ::= Comment | PI | S
8985 */
8986
8987void
8988xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008989 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008990 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008991 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008992 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008993 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008994 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008995 NEXT;
8996 } else
8997 xmlParseComment(ctxt);
8998 }
8999}
9000
9001/**
9002 * xmlParseDocument:
9003 * @ctxt: an XML parser context
9004 *
9005 * parse an XML document (and build a tree if using the standard SAX
9006 * interface).
9007 *
9008 * [1] document ::= prolog element Misc*
9009 *
9010 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9011 *
9012 * Returns 0, -1 in case of error. the parser context is augmented
9013 * as a result of the parsing.
9014 */
9015
9016int
9017xmlParseDocument(xmlParserCtxtPtr ctxt) {
9018 xmlChar start[4];
9019 xmlCharEncoding enc;
9020
9021 xmlInitParser();
9022
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009023 if ((ctxt == NULL) || (ctxt->input == NULL))
9024 return(-1);
9025
Owen Taylor3473f882001-02-23 17:55:21 +00009026 GROW;
9027
9028 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009029 * SAX: detecting the level.
9030 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009031 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032
9033 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009034 * SAX: beginning of the document processing.
9035 */
9036 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9037 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9038
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009039 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9040 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009041 /*
9042 * Get the 4 first bytes and decode the charset
9043 * if enc != XML_CHAR_ENCODING_NONE
9044 * plug some encoding conversion routines.
9045 */
9046 start[0] = RAW;
9047 start[1] = NXT(1);
9048 start[2] = NXT(2);
9049 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009050 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009051 if (enc != XML_CHAR_ENCODING_NONE) {
9052 xmlSwitchEncoding(ctxt, enc);
9053 }
Owen Taylor3473f882001-02-23 17:55:21 +00009054 }
9055
9056
9057 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009058 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009059 }
9060
9061 /*
9062 * Check for the XMLDecl in the Prolog.
9063 */
9064 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009065 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009066
9067 /*
9068 * Note that we will switch encoding on the fly.
9069 */
9070 xmlParseXMLDecl(ctxt);
9071 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9072 /*
9073 * The XML REC instructs us to stop parsing right here
9074 */
9075 return(-1);
9076 }
9077 ctxt->standalone = ctxt->input->standalone;
9078 SKIP_BLANKS;
9079 } else {
9080 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9081 }
9082 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9083 ctxt->sax->startDocument(ctxt->userData);
9084
9085 /*
9086 * The Misc part of the Prolog
9087 */
9088 GROW;
9089 xmlParseMisc(ctxt);
9090
9091 /*
9092 * Then possibly doc type declaration(s) and more Misc
9093 * (doctypedecl Misc*)?
9094 */
9095 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009096 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009097
9098 ctxt->inSubset = 1;
9099 xmlParseDocTypeDecl(ctxt);
9100 if (RAW == '[') {
9101 ctxt->instate = XML_PARSER_DTD;
9102 xmlParseInternalSubset(ctxt);
9103 }
9104
9105 /*
9106 * Create and update the external subset.
9107 */
9108 ctxt->inSubset = 2;
9109 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9110 (!ctxt->disableSAX))
9111 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9112 ctxt->extSubSystem, ctxt->extSubURI);
9113 ctxt->inSubset = 0;
9114
9115
9116 ctxt->instate = XML_PARSER_PROLOG;
9117 xmlParseMisc(ctxt);
9118 }
9119
9120 /*
9121 * Time to start parsing the tree itself
9122 */
9123 GROW;
9124 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009125 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9126 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009127 } else {
9128 ctxt->instate = XML_PARSER_CONTENT;
9129 xmlParseElement(ctxt);
9130 ctxt->instate = XML_PARSER_EPILOG;
9131
9132
9133 /*
9134 * The Misc part at the end
9135 */
9136 xmlParseMisc(ctxt);
9137
Daniel Veillard561b7f82002-03-20 21:55:57 +00009138 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009139 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009140 }
9141 ctxt->instate = XML_PARSER_EOF;
9142 }
9143
9144 /*
9145 * SAX: end of the document processing.
9146 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009147 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009148 ctxt->sax->endDocument(ctxt->userData);
9149
Daniel Veillard5997aca2002-03-18 18:36:20 +00009150 /*
9151 * Remove locally kept entity definitions if the tree was not built
9152 */
9153 if ((ctxt->myDoc != NULL) &&
9154 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9155 xmlFreeDoc(ctxt->myDoc);
9156 ctxt->myDoc = NULL;
9157 }
9158
Daniel Veillardc7612992002-02-17 22:47:37 +00009159 if (! ctxt->wellFormed) {
9160 ctxt->valid = 0;
9161 return(-1);
9162 }
Owen Taylor3473f882001-02-23 17:55:21 +00009163 return(0);
9164}
9165
9166/**
9167 * xmlParseExtParsedEnt:
9168 * @ctxt: an XML parser context
9169 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009170 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009171 * An external general parsed entity is well-formed if it matches the
9172 * production labeled extParsedEnt.
9173 *
9174 * [78] extParsedEnt ::= TextDecl? content
9175 *
9176 * Returns 0, -1 in case of error. the parser context is augmented
9177 * as a result of the parsing.
9178 */
9179
9180int
9181xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9182 xmlChar start[4];
9183 xmlCharEncoding enc;
9184
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009185 if ((ctxt == NULL) || (ctxt->input == NULL))
9186 return(-1);
9187
Owen Taylor3473f882001-02-23 17:55:21 +00009188 xmlDefaultSAXHandlerInit();
9189
Daniel Veillard309f81d2003-09-23 09:02:53 +00009190 xmlDetectSAX2(ctxt);
9191
Owen Taylor3473f882001-02-23 17:55:21 +00009192 GROW;
9193
9194 /*
9195 * SAX: beginning of the document processing.
9196 */
9197 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9198 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9199
9200 /*
9201 * Get the 4 first bytes and decode the charset
9202 * if enc != XML_CHAR_ENCODING_NONE
9203 * plug some encoding conversion routines.
9204 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009205 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9206 start[0] = RAW;
9207 start[1] = NXT(1);
9208 start[2] = NXT(2);
9209 start[3] = NXT(3);
9210 enc = xmlDetectCharEncoding(start, 4);
9211 if (enc != XML_CHAR_ENCODING_NONE) {
9212 xmlSwitchEncoding(ctxt, enc);
9213 }
Owen Taylor3473f882001-02-23 17:55:21 +00009214 }
9215
9216
9217 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009218 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009219 }
9220
9221 /*
9222 * Check for the XMLDecl in the Prolog.
9223 */
9224 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009225 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009226
9227 /*
9228 * Note that we will switch encoding on the fly.
9229 */
9230 xmlParseXMLDecl(ctxt);
9231 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9232 /*
9233 * The XML REC instructs us to stop parsing right here
9234 */
9235 return(-1);
9236 }
9237 SKIP_BLANKS;
9238 } else {
9239 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9240 }
9241 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9242 ctxt->sax->startDocument(ctxt->userData);
9243
9244 /*
9245 * Doing validity checking on chunk doesn't make sense
9246 */
9247 ctxt->instate = XML_PARSER_CONTENT;
9248 ctxt->validate = 0;
9249 ctxt->loadsubset = 0;
9250 ctxt->depth = 0;
9251
9252 xmlParseContent(ctxt);
9253
9254 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009255 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009256 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009257 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009258 }
9259
9260 /*
9261 * SAX: end of the document processing.
9262 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009263 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009264 ctxt->sax->endDocument(ctxt->userData);
9265
9266 if (! ctxt->wellFormed) return(-1);
9267 return(0);
9268}
9269
Daniel Veillard73b013f2003-09-30 12:36:01 +00009270#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009271/************************************************************************
9272 * *
9273 * Progressive parsing interfaces *
9274 * *
9275 ************************************************************************/
9276
9277/**
9278 * xmlParseLookupSequence:
9279 * @ctxt: an XML parser context
9280 * @first: the first char to lookup
9281 * @next: the next char to lookup or zero
9282 * @third: the next char to lookup or zero
9283 *
9284 * Try to find if a sequence (first, next, third) or just (first next) or
9285 * (first) is available in the input stream.
9286 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9287 * to avoid rescanning sequences of bytes, it DOES change the state of the
9288 * parser, do not use liberally.
9289 *
9290 * Returns the index to the current parsing point if the full sequence
9291 * is available, -1 otherwise.
9292 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009293static int
Owen Taylor3473f882001-02-23 17:55:21 +00009294xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9295 xmlChar next, xmlChar third) {
9296 int base, len;
9297 xmlParserInputPtr in;
9298 const xmlChar *buf;
9299
9300 in = ctxt->input;
9301 if (in == NULL) return(-1);
9302 base = in->cur - in->base;
9303 if (base < 0) return(-1);
9304 if (ctxt->checkIndex > base)
9305 base = ctxt->checkIndex;
9306 if (in->buf == NULL) {
9307 buf = in->base;
9308 len = in->length;
9309 } else {
9310 buf = in->buf->buffer->content;
9311 len = in->buf->buffer->use;
9312 }
9313 /* take into account the sequence length */
9314 if (third) len -= 2;
9315 else if (next) len --;
9316 for (;base < len;base++) {
9317 if (buf[base] == first) {
9318 if (third != 0) {
9319 if ((buf[base + 1] != next) ||
9320 (buf[base + 2] != third)) continue;
9321 } else if (next != 0) {
9322 if (buf[base + 1] != next) continue;
9323 }
9324 ctxt->checkIndex = 0;
9325#ifdef DEBUG_PUSH
9326 if (next == 0)
9327 xmlGenericError(xmlGenericErrorContext,
9328 "PP: lookup '%c' found at %d\n",
9329 first, base);
9330 else if (third == 0)
9331 xmlGenericError(xmlGenericErrorContext,
9332 "PP: lookup '%c%c' found at %d\n",
9333 first, next, base);
9334 else
9335 xmlGenericError(xmlGenericErrorContext,
9336 "PP: lookup '%c%c%c' found at %d\n",
9337 first, next, third, base);
9338#endif
9339 return(base - (in->cur - in->base));
9340 }
9341 }
9342 ctxt->checkIndex = base;
9343#ifdef DEBUG_PUSH
9344 if (next == 0)
9345 xmlGenericError(xmlGenericErrorContext,
9346 "PP: lookup '%c' failed\n", first);
9347 else if (third == 0)
9348 xmlGenericError(xmlGenericErrorContext,
9349 "PP: lookup '%c%c' failed\n", first, next);
9350 else
9351 xmlGenericError(xmlGenericErrorContext,
9352 "PP: lookup '%c%c%c' failed\n", first, next, third);
9353#endif
9354 return(-1);
9355}
9356
9357/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009358 * xmlParseGetLasts:
9359 * @ctxt: an XML parser context
9360 * @lastlt: pointer to store the last '<' from the input
9361 * @lastgt: pointer to store the last '>' from the input
9362 *
9363 * Lookup the last < and > in the current chunk
9364 */
9365static void
9366xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9367 const xmlChar **lastgt) {
9368 const xmlChar *tmp;
9369
9370 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9371 xmlGenericError(xmlGenericErrorContext,
9372 "Internal error: xmlParseGetLasts\n");
9373 return;
9374 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009375 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009376 tmp = ctxt->input->end;
9377 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009378 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009379 if (tmp < ctxt->input->base) {
9380 *lastlt = NULL;
9381 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009382 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009383 *lastlt = tmp;
9384 tmp++;
9385 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9386 if (*tmp == '\'') {
9387 tmp++;
9388 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9389 if (tmp < ctxt->input->end) tmp++;
9390 } else if (*tmp == '"') {
9391 tmp++;
9392 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9393 if (tmp < ctxt->input->end) tmp++;
9394 } else
9395 tmp++;
9396 }
9397 if (tmp < ctxt->input->end)
9398 *lastgt = tmp;
9399 else {
9400 tmp = *lastlt;
9401 tmp--;
9402 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9403 if (tmp >= ctxt->input->base)
9404 *lastgt = tmp;
9405 else
9406 *lastgt = NULL;
9407 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009408 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009409 } else {
9410 *lastlt = NULL;
9411 *lastgt = NULL;
9412 }
9413}
9414/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009415 * xmlCheckCdataPush:
9416 * @cur: pointer to the bock of characters
9417 * @len: length of the block in bytes
9418 *
9419 * Check that the block of characters is okay as SCdata content [20]
9420 *
9421 * Returns the number of bytes to pass if okay, a negative index where an
9422 * UTF-8 error occured otherwise
9423 */
9424static int
9425xmlCheckCdataPush(const xmlChar *utf, int len) {
9426 int ix;
9427 unsigned char c;
9428 int codepoint;
9429
9430 if ((utf == NULL) || (len <= 0))
9431 return(0);
9432
9433 for (ix = 0; ix < len;) { /* string is 0-terminated */
9434 c = utf[ix];
9435 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9436 if (c >= 0x20)
9437 ix++;
9438 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9439 ix++;
9440 else
9441 return(-ix);
9442 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9443 if (ix + 2 > len) return(ix);
9444 if ((utf[ix+1] & 0xc0 ) != 0x80)
9445 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009446 codepoint = (utf[ix] & 0x1f) << 6;
9447 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009448 if (!xmlIsCharQ(codepoint))
9449 return(-ix);
9450 ix += 2;
9451 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9452 if (ix + 3 > len) return(ix);
9453 if (((utf[ix+1] & 0xc0) != 0x80) ||
9454 ((utf[ix+2] & 0xc0) != 0x80))
9455 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009456 codepoint = (utf[ix] & 0xf) << 12;
9457 codepoint |= (utf[ix+1] & 0x3f) << 6;
9458 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009459 if (!xmlIsCharQ(codepoint))
9460 return(-ix);
9461 ix += 3;
9462 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9463 if (ix + 4 > len) return(ix);
9464 if (((utf[ix+1] & 0xc0) != 0x80) ||
9465 ((utf[ix+2] & 0xc0) != 0x80) ||
9466 ((utf[ix+3] & 0xc0) != 0x80))
9467 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009468 codepoint = (utf[ix] & 0x7) << 18;
9469 codepoint |= (utf[ix+1] & 0x3f) << 12;
9470 codepoint |= (utf[ix+2] & 0x3f) << 6;
9471 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009472 if (!xmlIsCharQ(codepoint))
9473 return(-ix);
9474 ix += 4;
9475 } else /* unknown encoding */
9476 return(-ix);
9477 }
9478 return(ix);
9479}
9480
9481/**
Owen Taylor3473f882001-02-23 17:55:21 +00009482 * xmlParseTryOrFinish:
9483 * @ctxt: an XML parser context
9484 * @terminate: last chunk indicator
9485 *
9486 * Try to progress on parsing
9487 *
9488 * Returns zero if no parsing was possible
9489 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009490static int
Owen Taylor3473f882001-02-23 17:55:21 +00009491xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9492 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009493 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009494 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009495 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009496
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009497 if (ctxt->input == NULL)
9498 return(0);
9499
Owen Taylor3473f882001-02-23 17:55:21 +00009500#ifdef DEBUG_PUSH
9501 switch (ctxt->instate) {
9502 case XML_PARSER_EOF:
9503 xmlGenericError(xmlGenericErrorContext,
9504 "PP: try EOF\n"); break;
9505 case XML_PARSER_START:
9506 xmlGenericError(xmlGenericErrorContext,
9507 "PP: try START\n"); break;
9508 case XML_PARSER_MISC:
9509 xmlGenericError(xmlGenericErrorContext,
9510 "PP: try MISC\n");break;
9511 case XML_PARSER_COMMENT:
9512 xmlGenericError(xmlGenericErrorContext,
9513 "PP: try COMMENT\n");break;
9514 case XML_PARSER_PROLOG:
9515 xmlGenericError(xmlGenericErrorContext,
9516 "PP: try PROLOG\n");break;
9517 case XML_PARSER_START_TAG:
9518 xmlGenericError(xmlGenericErrorContext,
9519 "PP: try START_TAG\n");break;
9520 case XML_PARSER_CONTENT:
9521 xmlGenericError(xmlGenericErrorContext,
9522 "PP: try CONTENT\n");break;
9523 case XML_PARSER_CDATA_SECTION:
9524 xmlGenericError(xmlGenericErrorContext,
9525 "PP: try CDATA_SECTION\n");break;
9526 case XML_PARSER_END_TAG:
9527 xmlGenericError(xmlGenericErrorContext,
9528 "PP: try END_TAG\n");break;
9529 case XML_PARSER_ENTITY_DECL:
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: try ENTITY_DECL\n");break;
9532 case XML_PARSER_ENTITY_VALUE:
9533 xmlGenericError(xmlGenericErrorContext,
9534 "PP: try ENTITY_VALUE\n");break;
9535 case XML_PARSER_ATTRIBUTE_VALUE:
9536 xmlGenericError(xmlGenericErrorContext,
9537 "PP: try ATTRIBUTE_VALUE\n");break;
9538 case XML_PARSER_DTD:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: try DTD\n");break;
9541 case XML_PARSER_EPILOG:
9542 xmlGenericError(xmlGenericErrorContext,
9543 "PP: try EPILOG\n");break;
9544 case XML_PARSER_PI:
9545 xmlGenericError(xmlGenericErrorContext,
9546 "PP: try PI\n");break;
9547 case XML_PARSER_IGNORE:
9548 xmlGenericError(xmlGenericErrorContext,
9549 "PP: try IGNORE\n");break;
9550 }
9551#endif
9552
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009553 if ((ctxt->input != NULL) &&
9554 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009555 xmlSHRINK(ctxt);
9556 ctxt->checkIndex = 0;
9557 }
9558 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009559
Daniel Veillarda880b122003-04-21 21:36:41 +00009560 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009561 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009562 return(0);
9563
9564
Owen Taylor3473f882001-02-23 17:55:21 +00009565 /*
9566 * Pop-up of finished entities.
9567 */
9568 while ((RAW == 0) && (ctxt->inputNr > 1))
9569 xmlPopInput(ctxt);
9570
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009571 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009572 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009573 avail = ctxt->input->length -
9574 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009575 else {
9576 /*
9577 * If we are operating on converted input, try to flush
9578 * remainng chars to avoid them stalling in the non-converted
9579 * buffer.
9580 */
9581 if ((ctxt->input->buf->raw != NULL) &&
9582 (ctxt->input->buf->raw->use > 0)) {
9583 int base = ctxt->input->base -
9584 ctxt->input->buf->buffer->content;
9585 int current = ctxt->input->cur - ctxt->input->base;
9586
9587 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9588 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9589 ctxt->input->cur = ctxt->input->base + current;
9590 ctxt->input->end =
9591 &ctxt->input->buf->buffer->content[
9592 ctxt->input->buf->buffer->use];
9593 }
9594 avail = ctxt->input->buf->buffer->use -
9595 (ctxt->input->cur - ctxt->input->base);
9596 }
Owen Taylor3473f882001-02-23 17:55:21 +00009597 if (avail < 1)
9598 goto done;
9599 switch (ctxt->instate) {
9600 case XML_PARSER_EOF:
9601 /*
9602 * Document parsing is done !
9603 */
9604 goto done;
9605 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009606 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9607 xmlChar start[4];
9608 xmlCharEncoding enc;
9609
9610 /*
9611 * Very first chars read from the document flow.
9612 */
9613 if (avail < 4)
9614 goto done;
9615
9616 /*
9617 * Get the 4 first bytes and decode the charset
9618 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009619 * plug some encoding conversion routines,
9620 * else xmlSwitchEncoding will set to (default)
9621 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009622 */
9623 start[0] = RAW;
9624 start[1] = NXT(1);
9625 start[2] = NXT(2);
9626 start[3] = NXT(3);
9627 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009628 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009629 break;
9630 }
Owen Taylor3473f882001-02-23 17:55:21 +00009631
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009632 if (avail < 2)
9633 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009634 cur = ctxt->input->cur[0];
9635 next = ctxt->input->cur[1];
9636 if (cur == 0) {
9637 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9638 ctxt->sax->setDocumentLocator(ctxt->userData,
9639 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009640 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009641 ctxt->instate = XML_PARSER_EOF;
9642#ifdef DEBUG_PUSH
9643 xmlGenericError(xmlGenericErrorContext,
9644 "PP: entering EOF\n");
9645#endif
9646 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9647 ctxt->sax->endDocument(ctxt->userData);
9648 goto done;
9649 }
9650 if ((cur == '<') && (next == '?')) {
9651 /* PI or XML decl */
9652 if (avail < 5) return(ret);
9653 if ((!terminate) &&
9654 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9655 return(ret);
9656 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9657 ctxt->sax->setDocumentLocator(ctxt->userData,
9658 &xmlDefaultSAXLocator);
9659 if ((ctxt->input->cur[2] == 'x') &&
9660 (ctxt->input->cur[3] == 'm') &&
9661 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009662 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009663 ret += 5;
9664#ifdef DEBUG_PUSH
9665 xmlGenericError(xmlGenericErrorContext,
9666 "PP: Parsing XML Decl\n");
9667#endif
9668 xmlParseXMLDecl(ctxt);
9669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9670 /*
9671 * The XML REC instructs us to stop parsing right
9672 * here
9673 */
9674 ctxt->instate = XML_PARSER_EOF;
9675 return(0);
9676 }
9677 ctxt->standalone = ctxt->input->standalone;
9678 if ((ctxt->encoding == NULL) &&
9679 (ctxt->input->encoding != NULL))
9680 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9681 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9682 (!ctxt->disableSAX))
9683 ctxt->sax->startDocument(ctxt->userData);
9684 ctxt->instate = XML_PARSER_MISC;
9685#ifdef DEBUG_PUSH
9686 xmlGenericError(xmlGenericErrorContext,
9687 "PP: entering MISC\n");
9688#endif
9689 } else {
9690 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9691 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9692 (!ctxt->disableSAX))
9693 ctxt->sax->startDocument(ctxt->userData);
9694 ctxt->instate = XML_PARSER_MISC;
9695#ifdef DEBUG_PUSH
9696 xmlGenericError(xmlGenericErrorContext,
9697 "PP: entering MISC\n");
9698#endif
9699 }
9700 } else {
9701 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9702 ctxt->sax->setDocumentLocator(ctxt->userData,
9703 &xmlDefaultSAXLocator);
9704 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009705 if (ctxt->version == NULL) {
9706 xmlErrMemory(ctxt, NULL);
9707 break;
9708 }
Owen Taylor3473f882001-02-23 17:55:21 +00009709 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9710 (!ctxt->disableSAX))
9711 ctxt->sax->startDocument(ctxt->userData);
9712 ctxt->instate = XML_PARSER_MISC;
9713#ifdef DEBUG_PUSH
9714 xmlGenericError(xmlGenericErrorContext,
9715 "PP: entering MISC\n");
9716#endif
9717 }
9718 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009719 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009720 const xmlChar *name;
9721 const xmlChar *prefix;
9722 const xmlChar *URI;
9723 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009724
9725 if ((avail < 2) && (ctxt->inputNr == 1))
9726 goto done;
9727 cur = ctxt->input->cur[0];
9728 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009729 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009730 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009731 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9732 ctxt->sax->endDocument(ctxt->userData);
9733 goto done;
9734 }
9735 if (!terminate) {
9736 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009737 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009738 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009739 goto done;
9740 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9741 goto done;
9742 }
9743 }
9744 if (ctxt->spaceNr == 0)
9745 spacePush(ctxt, -1);
9746 else
9747 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009748#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009749 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009750#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009751 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009752#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009753 else
9754 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009755#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009756 if (name == NULL) {
9757 spacePop(ctxt);
9758 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009759 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9760 ctxt->sax->endDocument(ctxt->userData);
9761 goto done;
9762 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009763#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009764 /*
9765 * [ VC: Root Element Type ]
9766 * The Name in the document type declaration must match
9767 * the element type of the root element.
9768 */
9769 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9770 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9771 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009772#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009773
9774 /*
9775 * Check for an Empty Element.
9776 */
9777 if ((RAW == '/') && (NXT(1) == '>')) {
9778 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009779
9780 if (ctxt->sax2) {
9781 if ((ctxt->sax != NULL) &&
9782 (ctxt->sax->endElementNs != NULL) &&
9783 (!ctxt->disableSAX))
9784 ctxt->sax->endElementNs(ctxt->userData, name,
9785 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009786 if (ctxt->nsNr - nsNr > 0)
9787 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009788#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009789 } else {
9790 if ((ctxt->sax != NULL) &&
9791 (ctxt->sax->endElement != NULL) &&
9792 (!ctxt->disableSAX))
9793 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009794#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009795 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009796 spacePop(ctxt);
9797 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009798 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009799 } else {
9800 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009801 }
9802 break;
9803 }
9804 if (RAW == '>') {
9805 NEXT;
9806 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009807 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009808 "Couldn't find end of Start Tag %s\n",
9809 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009810 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009811 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009812 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009813 if (ctxt->sax2)
9814 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009815#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009816 else
9817 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009818#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009819
Daniel Veillarda880b122003-04-21 21:36:41 +00009820 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 break;
9822 }
9823 case XML_PARSER_CONTENT: {
9824 const xmlChar *test;
9825 unsigned int cons;
9826 if ((avail < 2) && (ctxt->inputNr == 1))
9827 goto done;
9828 cur = ctxt->input->cur[0];
9829 next = ctxt->input->cur[1];
9830
9831 test = CUR_PTR;
9832 cons = ctxt->input->consumed;
9833 if ((cur == '<') && (next == '/')) {
9834 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009835 break;
9836 } else if ((cur == '<') && (next == '?')) {
9837 if ((!terminate) &&
9838 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9839 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009840 xmlParsePI(ctxt);
9841 } else if ((cur == '<') && (next != '!')) {
9842 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009843 break;
9844 } else if ((cur == '<') && (next == '!') &&
9845 (ctxt->input->cur[2] == '-') &&
9846 (ctxt->input->cur[3] == '-')) {
9847 if ((!terminate) &&
9848 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9849 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009850 xmlParseComment(ctxt);
9851 ctxt->instate = XML_PARSER_CONTENT;
9852 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9853 (ctxt->input->cur[2] == '[') &&
9854 (ctxt->input->cur[3] == 'C') &&
9855 (ctxt->input->cur[4] == 'D') &&
9856 (ctxt->input->cur[5] == 'A') &&
9857 (ctxt->input->cur[6] == 'T') &&
9858 (ctxt->input->cur[7] == 'A') &&
9859 (ctxt->input->cur[8] == '[')) {
9860 SKIP(9);
9861 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009862 break;
9863 } else if ((cur == '<') && (next == '!') &&
9864 (avail < 9)) {
9865 goto done;
9866 } else if (cur == '&') {
9867 if ((!terminate) &&
9868 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9869 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009870 xmlParseReference(ctxt);
9871 } else {
9872 /* TODO Avoid the extra copy, handle directly !!! */
9873 /*
9874 * Goal of the following test is:
9875 * - minimize calls to the SAX 'character' callback
9876 * when they are mergeable
9877 * - handle an problem for isBlank when we only parse
9878 * a sequence of blank chars and the next one is
9879 * not available to check against '<' presence.
9880 * - tries to homogenize the differences in SAX
9881 * callbacks between the push and pull versions
9882 * of the parser.
9883 */
9884 if ((ctxt->inputNr == 1) &&
9885 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9886 if (!terminate) {
9887 if (ctxt->progressive) {
9888 if ((lastlt == NULL) ||
9889 (ctxt->input->cur > lastlt))
9890 goto done;
9891 } else if (xmlParseLookupSequence(ctxt,
9892 '<', 0, 0) < 0) {
9893 goto done;
9894 }
9895 }
9896 }
9897 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009898 xmlParseCharData(ctxt, 0);
9899 }
9900 /*
9901 * Pop-up of finished entities.
9902 */
9903 while ((RAW == 0) && (ctxt->inputNr > 1))
9904 xmlPopInput(ctxt);
9905 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9907 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009908 ctxt->instate = XML_PARSER_EOF;
9909 break;
9910 }
9911 break;
9912 }
9913 case XML_PARSER_END_TAG:
9914 if (avail < 2)
9915 goto done;
9916 if (!terminate) {
9917 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009918 /* > can be found unescaped in attribute values */
9919 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009920 goto done;
9921 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9922 goto done;
9923 }
9924 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009925 if (ctxt->sax2) {
9926 xmlParseEndTag2(ctxt,
9927 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9928 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009929 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009930 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009931 }
9932#ifdef LIBXML_SAX1_ENABLED
9933 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009934 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009935#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009936 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009937 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009938 } else {
9939 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009940 }
9941 break;
9942 case XML_PARSER_CDATA_SECTION: {
9943 /*
9944 * The Push mode need to have the SAX callback for
9945 * cdataBlock merge back contiguous callbacks.
9946 */
9947 int base;
9948
9949 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9950 if (base < 0) {
9951 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009952 int tmp;
9953
9954 tmp = xmlCheckCdataPush(ctxt->input->cur,
9955 XML_PARSER_BIG_BUFFER_SIZE);
9956 if (tmp < 0) {
9957 tmp = -tmp;
9958 ctxt->input->cur += tmp;
9959 goto encoding_error;
9960 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9962 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009963 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009964 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009965 else if (ctxt->sax->characters != NULL)
9966 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009967 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009968 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009969 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009970 ctxt->checkIndex = 0;
9971 }
9972 goto done;
9973 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009974 int tmp;
9975
9976 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9977 if ((tmp < 0) || (tmp != base)) {
9978 tmp = -tmp;
9979 ctxt->input->cur += tmp;
9980 goto encoding_error;
9981 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009982 if ((ctxt->sax != NULL) && (base > 0) &&
9983 (!ctxt->disableSAX)) {
9984 if (ctxt->sax->cdataBlock != NULL)
9985 ctxt->sax->cdataBlock(ctxt->userData,
9986 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009987 else if (ctxt->sax->characters != NULL)
9988 ctxt->sax->characters(ctxt->userData,
9989 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009990 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009991 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009992 ctxt->checkIndex = 0;
9993 ctxt->instate = XML_PARSER_CONTENT;
9994#ifdef DEBUG_PUSH
9995 xmlGenericError(xmlGenericErrorContext,
9996 "PP: entering CONTENT\n");
9997#endif
9998 }
9999 break;
10000 }
Owen Taylor3473f882001-02-23 17:55:21 +000010001 case XML_PARSER_MISC:
10002 SKIP_BLANKS;
10003 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010004 avail = ctxt->input->length -
10005 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010006 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010007 avail = ctxt->input->buf->buffer->use -
10008 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010009 if (avail < 2)
10010 goto done;
10011 cur = ctxt->input->cur[0];
10012 next = ctxt->input->cur[1];
10013 if ((cur == '<') && (next == '?')) {
10014 if ((!terminate) &&
10015 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10016 goto done;
10017#ifdef DEBUG_PUSH
10018 xmlGenericError(xmlGenericErrorContext,
10019 "PP: Parsing PI\n");
10020#endif
10021 xmlParsePI(ctxt);
10022 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010023 (ctxt->input->cur[2] == '-') &&
10024 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010025 if ((!terminate) &&
10026 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10027 goto done;
10028#ifdef DEBUG_PUSH
10029 xmlGenericError(xmlGenericErrorContext,
10030 "PP: Parsing Comment\n");
10031#endif
10032 xmlParseComment(ctxt);
10033 ctxt->instate = XML_PARSER_MISC;
10034 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010035 (ctxt->input->cur[2] == 'D') &&
10036 (ctxt->input->cur[3] == 'O') &&
10037 (ctxt->input->cur[4] == 'C') &&
10038 (ctxt->input->cur[5] == 'T') &&
10039 (ctxt->input->cur[6] == 'Y') &&
10040 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010041 (ctxt->input->cur[8] == 'E')) {
10042 if ((!terminate) &&
10043 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10044 goto done;
10045#ifdef DEBUG_PUSH
10046 xmlGenericError(xmlGenericErrorContext,
10047 "PP: Parsing internal subset\n");
10048#endif
10049 ctxt->inSubset = 1;
10050 xmlParseDocTypeDecl(ctxt);
10051 if (RAW == '[') {
10052 ctxt->instate = XML_PARSER_DTD;
10053#ifdef DEBUG_PUSH
10054 xmlGenericError(xmlGenericErrorContext,
10055 "PP: entering DTD\n");
10056#endif
10057 } else {
10058 /*
10059 * Create and update the external subset.
10060 */
10061 ctxt->inSubset = 2;
10062 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10063 (ctxt->sax->externalSubset != NULL))
10064 ctxt->sax->externalSubset(ctxt->userData,
10065 ctxt->intSubName, ctxt->extSubSystem,
10066 ctxt->extSubURI);
10067 ctxt->inSubset = 0;
10068 ctxt->instate = XML_PARSER_PROLOG;
10069#ifdef DEBUG_PUSH
10070 xmlGenericError(xmlGenericErrorContext,
10071 "PP: entering PROLOG\n");
10072#endif
10073 }
10074 } else if ((cur == '<') && (next == '!') &&
10075 (avail < 9)) {
10076 goto done;
10077 } else {
10078 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010079 ctxt->progressive = 1;
10080 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010081#ifdef DEBUG_PUSH
10082 xmlGenericError(xmlGenericErrorContext,
10083 "PP: entering START_TAG\n");
10084#endif
10085 }
10086 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010087 case XML_PARSER_PROLOG:
10088 SKIP_BLANKS;
10089 if (ctxt->input->buf == NULL)
10090 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10091 else
10092 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10093 if (avail < 2)
10094 goto done;
10095 cur = ctxt->input->cur[0];
10096 next = ctxt->input->cur[1];
10097 if ((cur == '<') && (next == '?')) {
10098 if ((!terminate) &&
10099 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10100 goto done;
10101#ifdef DEBUG_PUSH
10102 xmlGenericError(xmlGenericErrorContext,
10103 "PP: Parsing PI\n");
10104#endif
10105 xmlParsePI(ctxt);
10106 } else if ((cur == '<') && (next == '!') &&
10107 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10108 if ((!terminate) &&
10109 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10110 goto done;
10111#ifdef DEBUG_PUSH
10112 xmlGenericError(xmlGenericErrorContext,
10113 "PP: Parsing Comment\n");
10114#endif
10115 xmlParseComment(ctxt);
10116 ctxt->instate = XML_PARSER_PROLOG;
10117 } else if ((cur == '<') && (next == '!') &&
10118 (avail < 4)) {
10119 goto done;
10120 } else {
10121 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010122 if (ctxt->progressive == 0)
10123 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010124 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010125#ifdef DEBUG_PUSH
10126 xmlGenericError(xmlGenericErrorContext,
10127 "PP: entering START_TAG\n");
10128#endif
10129 }
10130 break;
10131 case XML_PARSER_EPILOG:
10132 SKIP_BLANKS;
10133 if (ctxt->input->buf == NULL)
10134 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10135 else
10136 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10137 if (avail < 2)
10138 goto done;
10139 cur = ctxt->input->cur[0];
10140 next = ctxt->input->cur[1];
10141 if ((cur == '<') && (next == '?')) {
10142 if ((!terminate) &&
10143 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10144 goto done;
10145#ifdef DEBUG_PUSH
10146 xmlGenericError(xmlGenericErrorContext,
10147 "PP: Parsing PI\n");
10148#endif
10149 xmlParsePI(ctxt);
10150 ctxt->instate = XML_PARSER_EPILOG;
10151 } else if ((cur == '<') && (next == '!') &&
10152 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10153 if ((!terminate) &&
10154 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10155 goto done;
10156#ifdef DEBUG_PUSH
10157 xmlGenericError(xmlGenericErrorContext,
10158 "PP: Parsing Comment\n");
10159#endif
10160 xmlParseComment(ctxt);
10161 ctxt->instate = XML_PARSER_EPILOG;
10162 } else if ((cur == '<') && (next == '!') &&
10163 (avail < 4)) {
10164 goto done;
10165 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010166 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010167 ctxt->instate = XML_PARSER_EOF;
10168#ifdef DEBUG_PUSH
10169 xmlGenericError(xmlGenericErrorContext,
10170 "PP: entering EOF\n");
10171#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010172 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010173 ctxt->sax->endDocument(ctxt->userData);
10174 goto done;
10175 }
10176 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010177 case XML_PARSER_DTD: {
10178 /*
10179 * Sorry but progressive parsing of the internal subset
10180 * is not expected to be supported. We first check that
10181 * the full content of the internal subset is available and
10182 * the parsing is launched only at that point.
10183 * Internal subset ends up with "']' S? '>'" in an unescaped
10184 * section and not in a ']]>' sequence which are conditional
10185 * sections (whoever argued to keep that crap in XML deserve
10186 * a place in hell !).
10187 */
10188 int base, i;
10189 xmlChar *buf;
10190 xmlChar quote = 0;
10191
10192 base = ctxt->input->cur - ctxt->input->base;
10193 if (base < 0) return(0);
10194 if (ctxt->checkIndex > base)
10195 base = ctxt->checkIndex;
10196 buf = ctxt->input->buf->buffer->content;
10197 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10198 base++) {
10199 if (quote != 0) {
10200 if (buf[base] == quote)
10201 quote = 0;
10202 continue;
10203 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010204 if ((quote == 0) && (buf[base] == '<')) {
10205 int found = 0;
10206 /* special handling of comments */
10207 if (((unsigned int) base + 4 <
10208 ctxt->input->buf->buffer->use) &&
10209 (buf[base + 1] == '!') &&
10210 (buf[base + 2] == '-') &&
10211 (buf[base + 3] == '-')) {
10212 for (;(unsigned int) base + 3 <
10213 ctxt->input->buf->buffer->use; base++) {
10214 if ((buf[base] == '-') &&
10215 (buf[base + 1] == '-') &&
10216 (buf[base + 2] == '>')) {
10217 found = 1;
10218 base += 2;
10219 break;
10220 }
10221 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010222 if (!found) {
10223#if 0
10224 fprintf(stderr, "unfinished comment\n");
10225#endif
10226 break; /* for */
10227 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010228 continue;
10229 }
10230 }
Owen Taylor3473f882001-02-23 17:55:21 +000010231 if (buf[base] == '"') {
10232 quote = '"';
10233 continue;
10234 }
10235 if (buf[base] == '\'') {
10236 quote = '\'';
10237 continue;
10238 }
10239 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010240#if 0
10241 fprintf(stderr, "%c%c%c%c: ", buf[base],
10242 buf[base + 1], buf[base + 2], buf[base + 3]);
10243#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010244 if ((unsigned int) base +1 >=
10245 ctxt->input->buf->buffer->use)
10246 break;
10247 if (buf[base + 1] == ']') {
10248 /* conditional crap, skip both ']' ! */
10249 base++;
10250 continue;
10251 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010252 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010253 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10254 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010255 if (buf[base + i] == '>') {
10256#if 0
10257 fprintf(stderr, "found\n");
10258#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010259 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010260 }
10261 if (!IS_BLANK_CH(buf[base + i])) {
10262#if 0
10263 fprintf(stderr, "not found\n");
10264#endif
10265 goto not_end_of_int_subset;
10266 }
Owen Taylor3473f882001-02-23 17:55:21 +000010267 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010268#if 0
10269 fprintf(stderr, "end of stream\n");
10270#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010271 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010272
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010274not_end_of_int_subset:
10275 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010276 }
10277 /*
10278 * We didn't found the end of the Internal subset
10279 */
Owen Taylor3473f882001-02-23 17:55:21 +000010280#ifdef DEBUG_PUSH
10281 if (next == 0)
10282 xmlGenericError(xmlGenericErrorContext,
10283 "PP: lookup of int subset end filed\n");
10284#endif
10285 goto done;
10286
10287found_end_int_subset:
10288 xmlParseInternalSubset(ctxt);
10289 ctxt->inSubset = 2;
10290 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10291 (ctxt->sax->externalSubset != NULL))
10292 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10293 ctxt->extSubSystem, ctxt->extSubURI);
10294 ctxt->inSubset = 0;
10295 ctxt->instate = XML_PARSER_PROLOG;
10296 ctxt->checkIndex = 0;
10297#ifdef DEBUG_PUSH
10298 xmlGenericError(xmlGenericErrorContext,
10299 "PP: entering PROLOG\n");
10300#endif
10301 break;
10302 }
10303 case XML_PARSER_COMMENT:
10304 xmlGenericError(xmlGenericErrorContext,
10305 "PP: internal error, state == COMMENT\n");
10306 ctxt->instate = XML_PARSER_CONTENT;
10307#ifdef DEBUG_PUSH
10308 xmlGenericError(xmlGenericErrorContext,
10309 "PP: entering CONTENT\n");
10310#endif
10311 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010312 case XML_PARSER_IGNORE:
10313 xmlGenericError(xmlGenericErrorContext,
10314 "PP: internal error, state == IGNORE");
10315 ctxt->instate = XML_PARSER_DTD;
10316#ifdef DEBUG_PUSH
10317 xmlGenericError(xmlGenericErrorContext,
10318 "PP: entering DTD\n");
10319#endif
10320 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010321 case XML_PARSER_PI:
10322 xmlGenericError(xmlGenericErrorContext,
10323 "PP: internal error, state == PI\n");
10324 ctxt->instate = XML_PARSER_CONTENT;
10325#ifdef DEBUG_PUSH
10326 xmlGenericError(xmlGenericErrorContext,
10327 "PP: entering CONTENT\n");
10328#endif
10329 break;
10330 case XML_PARSER_ENTITY_DECL:
10331 xmlGenericError(xmlGenericErrorContext,
10332 "PP: internal error, state == ENTITY_DECL\n");
10333 ctxt->instate = XML_PARSER_DTD;
10334#ifdef DEBUG_PUSH
10335 xmlGenericError(xmlGenericErrorContext,
10336 "PP: entering DTD\n");
10337#endif
10338 break;
10339 case XML_PARSER_ENTITY_VALUE:
10340 xmlGenericError(xmlGenericErrorContext,
10341 "PP: internal error, state == ENTITY_VALUE\n");
10342 ctxt->instate = XML_PARSER_CONTENT;
10343#ifdef DEBUG_PUSH
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: entering DTD\n");
10346#endif
10347 break;
10348 case XML_PARSER_ATTRIBUTE_VALUE:
10349 xmlGenericError(xmlGenericErrorContext,
10350 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10351 ctxt->instate = XML_PARSER_START_TAG;
10352#ifdef DEBUG_PUSH
10353 xmlGenericError(xmlGenericErrorContext,
10354 "PP: entering START_TAG\n");
10355#endif
10356 break;
10357 case XML_PARSER_SYSTEM_LITERAL:
10358 xmlGenericError(xmlGenericErrorContext,
10359 "PP: internal error, state == SYSTEM_LITERAL\n");
10360 ctxt->instate = XML_PARSER_START_TAG;
10361#ifdef DEBUG_PUSH
10362 xmlGenericError(xmlGenericErrorContext,
10363 "PP: entering START_TAG\n");
10364#endif
10365 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010366 case XML_PARSER_PUBLIC_LITERAL:
10367 xmlGenericError(xmlGenericErrorContext,
10368 "PP: internal error, state == PUBLIC_LITERAL\n");
10369 ctxt->instate = XML_PARSER_START_TAG;
10370#ifdef DEBUG_PUSH
10371 xmlGenericError(xmlGenericErrorContext,
10372 "PP: entering START_TAG\n");
10373#endif
10374 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010375 }
10376 }
10377done:
10378#ifdef DEBUG_PUSH
10379 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10380#endif
10381 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010382encoding_error:
10383 {
10384 char buffer[150];
10385
10386 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10387 ctxt->input->cur[0], ctxt->input->cur[1],
10388 ctxt->input->cur[2], ctxt->input->cur[3]);
10389 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10390 "Input is not proper UTF-8, indicate encoding !\n%s",
10391 BAD_CAST buffer, NULL);
10392 }
10393 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010394}
10395
10396/**
Owen Taylor3473f882001-02-23 17:55:21 +000010397 * xmlParseChunk:
10398 * @ctxt: an XML parser context
10399 * @chunk: an char array
10400 * @size: the size in byte of the chunk
10401 * @terminate: last chunk indicator
10402 *
10403 * Parse a Chunk of memory
10404 *
10405 * Returns zero if no error, the xmlParserErrors otherwise.
10406 */
10407int
10408xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10409 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010410 if (ctxt == NULL)
10411 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010412 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010413 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010414 if (ctxt->instate == XML_PARSER_START)
10415 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010416 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10417 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10418 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10419 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010420 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010421
William M. Bracka3215c72004-07-31 16:24:01 +000010422 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10423 if (res < 0) {
10424 ctxt->errNo = XML_PARSER_EOF;
10425 ctxt->disableSAX = 1;
10426 return (XML_PARSER_EOF);
10427 }
Owen Taylor3473f882001-02-23 17:55:21 +000010428 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10429 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010430 ctxt->input->end =
10431 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010432#ifdef DEBUG_PUSH
10433 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10434#endif
10435
Owen Taylor3473f882001-02-23 17:55:21 +000010436 } else if (ctxt->instate != XML_PARSER_EOF) {
10437 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10438 xmlParserInputBufferPtr in = ctxt->input->buf;
10439 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10440 (in->raw != NULL)) {
10441 int nbchars;
10442
10443 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10444 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010445 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010446 xmlGenericError(xmlGenericErrorContext,
10447 "xmlParseChunk: encoder error\n");
10448 return(XML_ERR_INVALID_ENCODING);
10449 }
10450 }
10451 }
10452 }
10453 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010454 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010455 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 if (terminate) {
10457 /*
10458 * Check for termination
10459 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010460 int avail = 0;
10461
10462 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010463 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010464 avail = ctxt->input->length -
10465 (ctxt->input->cur - ctxt->input->base);
10466 else
10467 avail = ctxt->input->buf->buffer->use -
10468 (ctxt->input->cur - ctxt->input->base);
10469 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010470
Owen Taylor3473f882001-02-23 17:55:21 +000010471 if ((ctxt->instate != XML_PARSER_EOF) &&
10472 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010473 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010474 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010475 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010476 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010477 }
Owen Taylor3473f882001-02-23 17:55:21 +000010478 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010479 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010480 ctxt->sax->endDocument(ctxt->userData);
10481 }
10482 ctxt->instate = XML_PARSER_EOF;
10483 }
10484 return((xmlParserErrors) ctxt->errNo);
10485}
10486
10487/************************************************************************
10488 * *
10489 * I/O front end functions to the parser *
10490 * *
10491 ************************************************************************/
10492
10493/**
Owen Taylor3473f882001-02-23 17:55:21 +000010494 * xmlCreatePushParserCtxt:
10495 * @sax: a SAX handler
10496 * @user_data: The user data returned on SAX callbacks
10497 * @chunk: a pointer to an array of chars
10498 * @size: number of chars in the array
10499 * @filename: an optional file name or URI
10500 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010501 * Create a parser context for using the XML parser in push mode.
10502 * If @buffer and @size are non-NULL, the data is used to detect
10503 * the encoding. The remaining characters will be parsed so they
10504 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010505 * To allow content encoding detection, @size should be >= 4
10506 * The value of @filename is used for fetching external entities
10507 * and error/warning reports.
10508 *
10509 * Returns the new parser context or NULL
10510 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010511
Owen Taylor3473f882001-02-23 17:55:21 +000010512xmlParserCtxtPtr
10513xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10514 const char *chunk, int size, const char *filename) {
10515 xmlParserCtxtPtr ctxt;
10516 xmlParserInputPtr inputStream;
10517 xmlParserInputBufferPtr buf;
10518 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10519
10520 /*
10521 * plug some encoding conversion routines
10522 */
10523 if ((chunk != NULL) && (size >= 4))
10524 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10525
10526 buf = xmlAllocParserInputBuffer(enc);
10527 if (buf == NULL) return(NULL);
10528
10529 ctxt = xmlNewParserCtxt();
10530 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010531 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010532 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010533 return(NULL);
10534 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010535 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010536 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10537 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010538 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010539 xmlFreeParserInputBuffer(buf);
10540 xmlFreeParserCtxt(ctxt);
10541 return(NULL);
10542 }
Owen Taylor3473f882001-02-23 17:55:21 +000010543 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010544#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010545 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010546#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010547 xmlFree(ctxt->sax);
10548 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10549 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010550 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010551 xmlFreeParserInputBuffer(buf);
10552 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010553 return(NULL);
10554 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010555 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10556 if (sax->initialized == XML_SAX2_MAGIC)
10557 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10558 else
10559 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010560 if (user_data != NULL)
10561 ctxt->userData = user_data;
10562 }
10563 if (filename == NULL) {
10564 ctxt->directory = NULL;
10565 } else {
10566 ctxt->directory = xmlParserGetDirectory(filename);
10567 }
10568
10569 inputStream = xmlNewInputStream(ctxt);
10570 if (inputStream == NULL) {
10571 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010572 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010573 return(NULL);
10574 }
10575
10576 if (filename == NULL)
10577 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010578 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010579 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010580 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010581 if (inputStream->filename == NULL) {
10582 xmlFreeParserCtxt(ctxt);
10583 xmlFreeParserInputBuffer(buf);
10584 return(NULL);
10585 }
10586 }
Owen Taylor3473f882001-02-23 17:55:21 +000010587 inputStream->buf = buf;
10588 inputStream->base = inputStream->buf->buffer->content;
10589 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010590 inputStream->end =
10591 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010592
10593 inputPush(ctxt, inputStream);
10594
William M. Brack3a1cd212005-02-11 14:35:54 +000010595 /*
10596 * If the caller didn't provide an initial 'chunk' for determining
10597 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10598 * that it can be automatically determined later
10599 */
10600 if ((size == 0) || (chunk == NULL)) {
10601 ctxt->charset = XML_CHAR_ENCODING_NONE;
10602 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010603 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10604 int cur = ctxt->input->cur - ctxt->input->base;
10605
Owen Taylor3473f882001-02-23 17:55:21 +000010606 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010607
10608 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10609 ctxt->input->cur = ctxt->input->base + cur;
10610 ctxt->input->end =
10611 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010612#ifdef DEBUG_PUSH
10613 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10614#endif
10615 }
10616
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010617 if (enc != XML_CHAR_ENCODING_NONE) {
10618 xmlSwitchEncoding(ctxt, enc);
10619 }
10620
Owen Taylor3473f882001-02-23 17:55:21 +000010621 return(ctxt);
10622}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010623#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010624
10625/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010626 * xmlStopParser:
10627 * @ctxt: an XML parser context
10628 *
10629 * Blocks further parser processing
10630 */
10631void
10632xmlStopParser(xmlParserCtxtPtr ctxt) {
10633 if (ctxt == NULL)
10634 return;
10635 ctxt->instate = XML_PARSER_EOF;
10636 ctxt->disableSAX = 1;
10637 if (ctxt->input != NULL) {
10638 ctxt->input->cur = BAD_CAST"";
10639 ctxt->input->base = ctxt->input->cur;
10640 }
10641}
10642
10643/**
Owen Taylor3473f882001-02-23 17:55:21 +000010644 * xmlCreateIOParserCtxt:
10645 * @sax: a SAX handler
10646 * @user_data: The user data returned on SAX callbacks
10647 * @ioread: an I/O read function
10648 * @ioclose: an I/O close function
10649 * @ioctx: an I/O handler
10650 * @enc: the charset encoding if known
10651 *
10652 * Create a parser context for using the XML parser with an existing
10653 * I/O stream
10654 *
10655 * Returns the new parser context or NULL
10656 */
10657xmlParserCtxtPtr
10658xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10659 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10660 void *ioctx, xmlCharEncoding enc) {
10661 xmlParserCtxtPtr ctxt;
10662 xmlParserInputPtr inputStream;
10663 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010664
10665 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010666
10667 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10668 if (buf == NULL) return(NULL);
10669
10670 ctxt = xmlNewParserCtxt();
10671 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010672 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010673 return(NULL);
10674 }
10675 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010676#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010677 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010678#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010679 xmlFree(ctxt->sax);
10680 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10681 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010682 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010683 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010684 return(NULL);
10685 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010686 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10687 if (sax->initialized == XML_SAX2_MAGIC)
10688 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10689 else
10690 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010691 if (user_data != NULL)
10692 ctxt->userData = user_data;
10693 }
10694
10695 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10696 if (inputStream == NULL) {
10697 xmlFreeParserCtxt(ctxt);
10698 return(NULL);
10699 }
10700 inputPush(ctxt, inputStream);
10701
10702 return(ctxt);
10703}
10704
Daniel Veillard4432df22003-09-28 18:58:27 +000010705#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010706/************************************************************************
10707 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010708 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010709 * *
10710 ************************************************************************/
10711
10712/**
10713 * xmlIOParseDTD:
10714 * @sax: the SAX handler block or NULL
10715 * @input: an Input Buffer
10716 * @enc: the charset encoding if known
10717 *
10718 * Load and parse a DTD
10719 *
10720 * Returns the resulting xmlDtdPtr or NULL in case of error.
10721 * @input will be freed at parsing end.
10722 */
10723
10724xmlDtdPtr
10725xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10726 xmlCharEncoding enc) {
10727 xmlDtdPtr ret = NULL;
10728 xmlParserCtxtPtr ctxt;
10729 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010730 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010731
10732 if (input == NULL)
10733 return(NULL);
10734
10735 ctxt = xmlNewParserCtxt();
10736 if (ctxt == NULL) {
10737 return(NULL);
10738 }
10739
10740 /*
10741 * Set-up the SAX context
10742 */
10743 if (sax != NULL) {
10744 if (ctxt->sax != NULL)
10745 xmlFree(ctxt->sax);
10746 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010747 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010748 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010749 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010750
10751 /*
10752 * generate a parser input from the I/O handler
10753 */
10754
Daniel Veillard43caefb2003-12-07 19:32:22 +000010755 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010756 if (pinput == NULL) {
10757 if (sax != NULL) ctxt->sax = NULL;
10758 xmlFreeParserCtxt(ctxt);
10759 return(NULL);
10760 }
10761
10762 /*
10763 * plug some encoding conversion routines here.
10764 */
10765 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010766 if (enc != XML_CHAR_ENCODING_NONE) {
10767 xmlSwitchEncoding(ctxt, enc);
10768 }
Owen Taylor3473f882001-02-23 17:55:21 +000010769
10770 pinput->filename = NULL;
10771 pinput->line = 1;
10772 pinput->col = 1;
10773 pinput->base = ctxt->input->cur;
10774 pinput->cur = ctxt->input->cur;
10775 pinput->free = NULL;
10776
10777 /*
10778 * let's parse that entity knowing it's an external subset.
10779 */
10780 ctxt->inSubset = 2;
10781 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10782 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10783 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010784
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010785 if ((enc == XML_CHAR_ENCODING_NONE) &&
10786 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010787 /*
10788 * Get the 4 first bytes and decode the charset
10789 * if enc != XML_CHAR_ENCODING_NONE
10790 * plug some encoding conversion routines.
10791 */
10792 start[0] = RAW;
10793 start[1] = NXT(1);
10794 start[2] = NXT(2);
10795 start[3] = NXT(3);
10796 enc = xmlDetectCharEncoding(start, 4);
10797 if (enc != XML_CHAR_ENCODING_NONE) {
10798 xmlSwitchEncoding(ctxt, enc);
10799 }
10800 }
10801
Owen Taylor3473f882001-02-23 17:55:21 +000010802 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10803
10804 if (ctxt->myDoc != NULL) {
10805 if (ctxt->wellFormed) {
10806 ret = ctxt->myDoc->extSubset;
10807 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010808 if (ret != NULL) {
10809 xmlNodePtr tmp;
10810
10811 ret->doc = NULL;
10812 tmp = ret->children;
10813 while (tmp != NULL) {
10814 tmp->doc = NULL;
10815 tmp = tmp->next;
10816 }
10817 }
Owen Taylor3473f882001-02-23 17:55:21 +000010818 } else {
10819 ret = NULL;
10820 }
10821 xmlFreeDoc(ctxt->myDoc);
10822 ctxt->myDoc = NULL;
10823 }
10824 if (sax != NULL) ctxt->sax = NULL;
10825 xmlFreeParserCtxt(ctxt);
10826
10827 return(ret);
10828}
10829
10830/**
10831 * xmlSAXParseDTD:
10832 * @sax: the SAX handler block
10833 * @ExternalID: a NAME* containing the External ID of the DTD
10834 * @SystemID: a NAME* containing the URL to the DTD
10835 *
10836 * Load and parse an external subset.
10837 *
10838 * Returns the resulting xmlDtdPtr or NULL in case of error.
10839 */
10840
10841xmlDtdPtr
10842xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10843 const xmlChar *SystemID) {
10844 xmlDtdPtr ret = NULL;
10845 xmlParserCtxtPtr ctxt;
10846 xmlParserInputPtr input = NULL;
10847 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010848 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010849
10850 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10851
10852 ctxt = xmlNewParserCtxt();
10853 if (ctxt == NULL) {
10854 return(NULL);
10855 }
10856
10857 /*
10858 * Set-up the SAX context
10859 */
10860 if (sax != NULL) {
10861 if (ctxt->sax != NULL)
10862 xmlFree(ctxt->sax);
10863 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010864 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010865 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010866
10867 /*
10868 * Canonicalise the system ID
10869 */
10870 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010871 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010872 xmlFreeParserCtxt(ctxt);
10873 return(NULL);
10874 }
Owen Taylor3473f882001-02-23 17:55:21 +000010875
10876 /*
10877 * Ask the Entity resolver to load the damn thing
10878 */
10879
10880 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010881 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010882 if (input == NULL) {
10883 if (sax != NULL) ctxt->sax = NULL;
10884 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010885 if (systemIdCanonic != NULL)
10886 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 return(NULL);
10888 }
10889
10890 /*
10891 * plug some encoding conversion routines here.
10892 */
10893 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010894 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10895 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10896 xmlSwitchEncoding(ctxt, enc);
10897 }
Owen Taylor3473f882001-02-23 17:55:21 +000010898
10899 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010900 input->filename = (char *) systemIdCanonic;
10901 else
10902 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010903 input->line = 1;
10904 input->col = 1;
10905 input->base = ctxt->input->cur;
10906 input->cur = ctxt->input->cur;
10907 input->free = NULL;
10908
10909 /*
10910 * let's parse that entity knowing it's an external subset.
10911 */
10912 ctxt->inSubset = 2;
10913 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10914 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10915 ExternalID, SystemID);
10916 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10917
10918 if (ctxt->myDoc != NULL) {
10919 if (ctxt->wellFormed) {
10920 ret = ctxt->myDoc->extSubset;
10921 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010922 if (ret != NULL) {
10923 xmlNodePtr tmp;
10924
10925 ret->doc = NULL;
10926 tmp = ret->children;
10927 while (tmp != NULL) {
10928 tmp->doc = NULL;
10929 tmp = tmp->next;
10930 }
10931 }
Owen Taylor3473f882001-02-23 17:55:21 +000010932 } else {
10933 ret = NULL;
10934 }
10935 xmlFreeDoc(ctxt->myDoc);
10936 ctxt->myDoc = NULL;
10937 }
10938 if (sax != NULL) ctxt->sax = NULL;
10939 xmlFreeParserCtxt(ctxt);
10940
10941 return(ret);
10942}
10943
Daniel Veillard4432df22003-09-28 18:58:27 +000010944
Owen Taylor3473f882001-02-23 17:55:21 +000010945/**
10946 * xmlParseDTD:
10947 * @ExternalID: a NAME* containing the External ID of the DTD
10948 * @SystemID: a NAME* containing the URL to the DTD
10949 *
10950 * Load and parse an external subset.
10951 *
10952 * Returns the resulting xmlDtdPtr or NULL in case of error.
10953 */
10954
10955xmlDtdPtr
10956xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10957 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10958}
Daniel Veillard4432df22003-09-28 18:58:27 +000010959#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010960
10961/************************************************************************
10962 * *
10963 * Front ends when parsing an Entity *
10964 * *
10965 ************************************************************************/
10966
10967/**
Owen Taylor3473f882001-02-23 17:55:21 +000010968 * xmlParseCtxtExternalEntity:
10969 * @ctx: the existing parsing context
10970 * @URL: the URL for the entity to load
10971 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010972 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010973 *
10974 * Parse an external general entity within an existing parsing context
10975 * An external general parsed entity is well-formed if it matches the
10976 * production labeled extParsedEnt.
10977 *
10978 * [78] extParsedEnt ::= TextDecl? content
10979 *
10980 * Returns 0 if the entity is well formed, -1 in case of args problem and
10981 * the parser error code otherwise
10982 */
10983
10984int
10985xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010986 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010987 xmlParserCtxtPtr ctxt;
10988 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010989 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010990 xmlSAXHandlerPtr oldsax = NULL;
10991 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010992 xmlChar start[4];
10993 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010994
Daniel Veillardce682bc2004-11-05 17:22:25 +000010995 if (ctx == NULL) return(-1);
10996
Owen Taylor3473f882001-02-23 17:55:21 +000010997 if (ctx->depth > 40) {
10998 return(XML_ERR_ENTITY_LOOP);
10999 }
11000
Daniel Veillardcda96922001-08-21 10:56:31 +000011001 if (lst != NULL)
11002 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011003 if ((URL == NULL) && (ID == NULL))
11004 return(-1);
11005 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11006 return(-1);
11007
11008
11009 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11010 if (ctxt == NULL) return(-1);
11011 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011012 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011013 oldsax = ctxt->sax;
11014 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011015 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011016 newDoc = xmlNewDoc(BAD_CAST "1.0");
11017 if (newDoc == NULL) {
11018 xmlFreeParserCtxt(ctxt);
11019 return(-1);
11020 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011021 if (ctx->myDoc->dict) {
11022 newDoc->dict = ctx->myDoc->dict;
11023 xmlDictReference(newDoc->dict);
11024 }
Owen Taylor3473f882001-02-23 17:55:21 +000011025 if (ctx->myDoc != NULL) {
11026 newDoc->intSubset = ctx->myDoc->intSubset;
11027 newDoc->extSubset = ctx->myDoc->extSubset;
11028 }
11029 if (ctx->myDoc->URL != NULL) {
11030 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11031 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011032 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11033 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011034 ctxt->sax = oldsax;
11035 xmlFreeParserCtxt(ctxt);
11036 newDoc->intSubset = NULL;
11037 newDoc->extSubset = NULL;
11038 xmlFreeDoc(newDoc);
11039 return(-1);
11040 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011041 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011042 nodePush(ctxt, newDoc->children);
11043 if (ctx->myDoc == NULL) {
11044 ctxt->myDoc = newDoc;
11045 } else {
11046 ctxt->myDoc = ctx->myDoc;
11047 newDoc->children->doc = ctx->myDoc;
11048 }
11049
Daniel Veillard87a764e2001-06-20 17:41:10 +000011050 /*
11051 * Get the 4 first bytes and decode the charset
11052 * if enc != XML_CHAR_ENCODING_NONE
11053 * plug some encoding conversion routines.
11054 */
11055 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011056 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11057 start[0] = RAW;
11058 start[1] = NXT(1);
11059 start[2] = NXT(2);
11060 start[3] = NXT(3);
11061 enc = xmlDetectCharEncoding(start, 4);
11062 if (enc != XML_CHAR_ENCODING_NONE) {
11063 xmlSwitchEncoding(ctxt, enc);
11064 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011065 }
11066
Owen Taylor3473f882001-02-23 17:55:21 +000011067 /*
11068 * Parse a possible text declaration first
11069 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011070 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011071 xmlParseTextDecl(ctxt);
11072 }
11073
11074 /*
11075 * Doing validity checking on chunk doesn't make sense
11076 */
11077 ctxt->instate = XML_PARSER_CONTENT;
11078 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011079 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011080 ctxt->loadsubset = ctx->loadsubset;
11081 ctxt->depth = ctx->depth + 1;
11082 ctxt->replaceEntities = ctx->replaceEntities;
11083 if (ctxt->validate) {
11084 ctxt->vctxt.error = ctx->vctxt.error;
11085 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011086 } else {
11087 ctxt->vctxt.error = NULL;
11088 ctxt->vctxt.warning = NULL;
11089 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011090 ctxt->vctxt.nodeTab = NULL;
11091 ctxt->vctxt.nodeNr = 0;
11092 ctxt->vctxt.nodeMax = 0;
11093 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011094 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11095 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011096 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11097 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11098 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011099 ctxt->dictNames = ctx->dictNames;
11100 ctxt->attsDefault = ctx->attsDefault;
11101 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011102 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011103
11104 xmlParseContent(ctxt);
11105
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011106 ctx->validate = ctxt->validate;
11107 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011108 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011109 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011110 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011111 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011112 }
11113 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011114 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011115 }
11116
11117 if (!ctxt->wellFormed) {
11118 if (ctxt->errNo == 0)
11119 ret = 1;
11120 else
11121 ret = ctxt->errNo;
11122 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011123 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011124 xmlNodePtr cur;
11125
11126 /*
11127 * Return the newly created nodeset after unlinking it from
11128 * they pseudo parent.
11129 */
11130 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011131 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011132 while (cur != NULL) {
11133 cur->parent = NULL;
11134 cur = cur->next;
11135 }
11136 newDoc->children->children = NULL;
11137 }
11138 ret = 0;
11139 }
11140 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011141 ctxt->dict = NULL;
11142 ctxt->attsDefault = NULL;
11143 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011144 xmlFreeParserCtxt(ctxt);
11145 newDoc->intSubset = NULL;
11146 newDoc->extSubset = NULL;
11147 xmlFreeDoc(newDoc);
11148
11149 return(ret);
11150}
11151
11152/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011153 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011154 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011155 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011156 * @sax: the SAX handler bloc (possibly NULL)
11157 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11158 * @depth: Used for loop detection, use 0
11159 * @URL: the URL for the entity to load
11160 * @ID: the System ID for the entity to load
11161 * @list: the return value for the set of parsed nodes
11162 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011163 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011164 *
11165 * Returns 0 if the entity is well formed, -1 in case of args problem and
11166 * the parser error code otherwise
11167 */
11168
Daniel Veillard7d515752003-09-26 19:12:37 +000011169static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011170xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11171 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011172 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011173 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011174 xmlParserCtxtPtr ctxt;
11175 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011176 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011177 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011178 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011179 xmlChar start[4];
11180 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011181
11182 if (depth > 40) {
11183 return(XML_ERR_ENTITY_LOOP);
11184 }
11185
11186
11187
11188 if (list != NULL)
11189 *list = NULL;
11190 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011191 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011192 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011193 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011194
11195
11196 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011197 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011198 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011199 if (oldctxt != NULL) {
11200 ctxt->_private = oldctxt->_private;
11201 ctxt->loadsubset = oldctxt->loadsubset;
11202 ctxt->validate = oldctxt->validate;
11203 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011204 ctxt->record_info = oldctxt->record_info;
11205 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11206 ctxt->node_seq.length = oldctxt->node_seq.length;
11207 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011208 } else {
11209 /*
11210 * Doing validity checking on chunk without context
11211 * doesn't make sense
11212 */
11213 ctxt->_private = NULL;
11214 ctxt->validate = 0;
11215 ctxt->external = 2;
11216 ctxt->loadsubset = 0;
11217 }
Owen Taylor3473f882001-02-23 17:55:21 +000011218 if (sax != NULL) {
11219 oldsax = ctxt->sax;
11220 ctxt->sax = sax;
11221 if (user_data != NULL)
11222 ctxt->userData = user_data;
11223 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011224 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011225 newDoc = xmlNewDoc(BAD_CAST "1.0");
11226 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011227 ctxt->node_seq.maximum = 0;
11228 ctxt->node_seq.length = 0;
11229 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011230 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011231 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011232 }
11233 if (doc != NULL) {
11234 newDoc->intSubset = doc->intSubset;
11235 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011236 newDoc->dict = doc->dict;
11237 } else if (oldctxt != NULL) {
11238 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011239 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011240 xmlDictReference(newDoc->dict);
11241
Owen Taylor3473f882001-02-23 17:55:21 +000011242 if (doc->URL != NULL) {
11243 newDoc->URL = xmlStrdup(doc->URL);
11244 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011245 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11246 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011247 if (sax != NULL)
11248 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011249 ctxt->node_seq.maximum = 0;
11250 ctxt->node_seq.length = 0;
11251 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011252 xmlFreeParserCtxt(ctxt);
11253 newDoc->intSubset = NULL;
11254 newDoc->extSubset = NULL;
11255 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011256 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011258 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011259 nodePush(ctxt, newDoc->children);
11260 if (doc == NULL) {
11261 ctxt->myDoc = newDoc;
11262 } else {
11263 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011264 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011265 }
11266
Daniel Veillard87a764e2001-06-20 17:41:10 +000011267 /*
11268 * Get the 4 first bytes and decode the charset
11269 * if enc != XML_CHAR_ENCODING_NONE
11270 * plug some encoding conversion routines.
11271 */
11272 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011273 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11274 start[0] = RAW;
11275 start[1] = NXT(1);
11276 start[2] = NXT(2);
11277 start[3] = NXT(3);
11278 enc = xmlDetectCharEncoding(start, 4);
11279 if (enc != XML_CHAR_ENCODING_NONE) {
11280 xmlSwitchEncoding(ctxt, enc);
11281 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011282 }
11283
Owen Taylor3473f882001-02-23 17:55:21 +000011284 /*
11285 * Parse a possible text declaration first
11286 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011287 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011288 xmlParseTextDecl(ctxt);
11289 }
11290
Owen Taylor3473f882001-02-23 17:55:21 +000011291 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011292 ctxt->depth = depth;
11293
11294 xmlParseContent(ctxt);
11295
Daniel Veillard561b7f82002-03-20 21:55:57 +000011296 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011297 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011298 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011299 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011300 }
11301 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011303 }
11304
11305 if (!ctxt->wellFormed) {
11306 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011307 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011308 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011309 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011310 } else {
11311 if (list != NULL) {
11312 xmlNodePtr cur;
11313
11314 /*
11315 * Return the newly created nodeset after unlinking it from
11316 * they pseudo parent.
11317 */
11318 cur = newDoc->children->children;
11319 *list = cur;
11320 while (cur != NULL) {
11321 cur->parent = NULL;
11322 cur = cur->next;
11323 }
11324 newDoc->children->children = NULL;
11325 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011326 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011327 }
11328 if (sax != NULL)
11329 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011330 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11331 oldctxt->node_seq.length = ctxt->node_seq.length;
11332 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011333 ctxt->node_seq.maximum = 0;
11334 ctxt->node_seq.length = 0;
11335 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011336 xmlFreeParserCtxt(ctxt);
11337 newDoc->intSubset = NULL;
11338 newDoc->extSubset = NULL;
11339 xmlFreeDoc(newDoc);
11340
11341 return(ret);
11342}
11343
Daniel Veillard81273902003-09-30 00:43:48 +000011344#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011345/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011346 * xmlParseExternalEntity:
11347 * @doc: the document the chunk pertains to
11348 * @sax: the SAX handler bloc (possibly NULL)
11349 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11350 * @depth: Used for loop detection, use 0
11351 * @URL: the URL for the entity to load
11352 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011353 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011354 *
11355 * Parse an external general entity
11356 * An external general parsed entity is well-formed if it matches the
11357 * production labeled extParsedEnt.
11358 *
11359 * [78] extParsedEnt ::= TextDecl? content
11360 *
11361 * Returns 0 if the entity is well formed, -1 in case of args problem and
11362 * the parser error code otherwise
11363 */
11364
11365int
11366xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011367 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011368 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011369 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011370}
11371
11372/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011373 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011374 * @doc: the document the chunk pertains to
11375 * @sax: the SAX handler bloc (possibly NULL)
11376 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11377 * @depth: Used for loop detection, use 0
11378 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011379 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011380 *
11381 * Parse a well-balanced chunk of an XML document
11382 * called by the parser
11383 * The allowed sequence for the Well Balanced Chunk is the one defined by
11384 * the content production in the XML grammar:
11385 *
11386 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11387 *
11388 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11389 * the parser error code otherwise
11390 */
11391
11392int
11393xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011394 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011395 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11396 depth, string, lst, 0 );
11397}
Daniel Veillard81273902003-09-30 00:43:48 +000011398#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011399
11400/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011401 * xmlParseBalancedChunkMemoryInternal:
11402 * @oldctxt: the existing parsing context
11403 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11404 * @user_data: the user data field for the parser context
11405 * @lst: the return value for the set of parsed nodes
11406 *
11407 *
11408 * Parse a well-balanced chunk of an XML document
11409 * called by the parser
11410 * The allowed sequence for the Well Balanced Chunk is the one defined by
11411 * the content production in the XML grammar:
11412 *
11413 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11414 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011415 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11416 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011417 *
11418 * In case recover is set to 1, the nodelist will not be empty even if
11419 * the parsed chunk is not well balanced.
11420 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011421static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011422xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11423 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11424 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011425 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011426 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011427 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011428 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011429 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011430 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011431 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011432
11433 if (oldctxt->depth > 40) {
11434 return(XML_ERR_ENTITY_LOOP);
11435 }
11436
11437
11438 if (lst != NULL)
11439 *lst = NULL;
11440 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011441 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011442
11443 size = xmlStrlen(string);
11444
11445 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011446 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011447 if (user_data != NULL)
11448 ctxt->userData = user_data;
11449 else
11450 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011451 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11452 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011453 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11454 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11455 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011456
11457 oldsax = ctxt->sax;
11458 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011459 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011460 ctxt->replaceEntities = oldctxt->replaceEntities;
11461 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011462
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011463 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011464 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011465 newDoc = xmlNewDoc(BAD_CAST "1.0");
11466 if (newDoc == NULL) {
11467 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011468 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011469 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011470 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011471 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011472 newDoc->dict = ctxt->dict;
11473 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011474 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011475 } else {
11476 ctxt->myDoc = oldctxt->myDoc;
11477 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011478 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011479 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011480 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11481 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011482 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011483 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011484 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011485 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011486 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011487 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011488 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011489 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011490 ctxt->myDoc->children = NULL;
11491 ctxt->myDoc->last = NULL;
11492 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011493 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011494 ctxt->instate = XML_PARSER_CONTENT;
11495 ctxt->depth = oldctxt->depth + 1;
11496
Daniel Veillard328f48c2002-11-15 15:24:34 +000011497 ctxt->validate = 0;
11498 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011499 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11500 /*
11501 * ID/IDREF registration will be done in xmlValidateElement below
11502 */
11503 ctxt->loadsubset |= XML_SKIP_IDS;
11504 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011505 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011506 ctxt->attsDefault = oldctxt->attsDefault;
11507 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011508
Daniel Veillard68e9e742002-11-16 15:35:11 +000011509 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011510 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011512 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011513 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011514 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011515 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011516 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011517 }
11518
11519 if (!ctxt->wellFormed) {
11520 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011521 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011522 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011523 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011524 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011525 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011526 }
11527
William M. Brack7b9154b2003-09-27 19:23:50 +000011528 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011529 xmlNodePtr cur;
11530
11531 /*
11532 * Return the newly created nodeset after unlinking it from
11533 * they pseudo parent.
11534 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011535 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011536 *lst = cur;
11537 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011538#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011539 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11540 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11541 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011542 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11543 oldctxt->myDoc, cur);
11544 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011545#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011546 cur->parent = NULL;
11547 cur = cur->next;
11548 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011549 ctxt->myDoc->children->children = NULL;
11550 }
11551 if (ctxt->myDoc != NULL) {
11552 xmlFreeNode(ctxt->myDoc->children);
11553 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011554 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011555 }
11556
11557 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011558 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011559 ctxt->attsDefault = NULL;
11560 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011561 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011562 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011563 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011564 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011565
11566 return(ret);
11567}
11568
Daniel Veillard29b17482004-08-16 00:39:03 +000011569/**
11570 * xmlParseInNodeContext:
11571 * @node: the context node
11572 * @data: the input string
11573 * @datalen: the input string length in bytes
11574 * @options: a combination of xmlParserOption
11575 * @lst: the return value for the set of parsed nodes
11576 *
11577 * Parse a well-balanced chunk of an XML document
11578 * within the context (DTD, namespaces, etc ...) of the given node.
11579 *
11580 * The allowed sequence for the data is a Well Balanced Chunk defined by
11581 * the content production in the XML grammar:
11582 *
11583 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11584 *
11585 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11586 * error code otherwise
11587 */
11588xmlParserErrors
11589xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11590 int options, xmlNodePtr *lst) {
11591#ifdef SAX2
11592 xmlParserCtxtPtr ctxt;
11593 xmlDocPtr doc = NULL;
11594 xmlNodePtr fake, cur;
11595 int nsnr = 0;
11596
11597 xmlParserErrors ret = XML_ERR_OK;
11598
11599 /*
11600 * check all input parameters, grab the document
11601 */
11602 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11603 return(XML_ERR_INTERNAL_ERROR);
11604 switch (node->type) {
11605 case XML_ELEMENT_NODE:
11606 case XML_ATTRIBUTE_NODE:
11607 case XML_TEXT_NODE:
11608 case XML_CDATA_SECTION_NODE:
11609 case XML_ENTITY_REF_NODE:
11610 case XML_PI_NODE:
11611 case XML_COMMENT_NODE:
11612 case XML_DOCUMENT_NODE:
11613 case XML_HTML_DOCUMENT_NODE:
11614 break;
11615 default:
11616 return(XML_ERR_INTERNAL_ERROR);
11617
11618 }
11619 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11620 (node->type != XML_DOCUMENT_NODE) &&
11621 (node->type != XML_HTML_DOCUMENT_NODE))
11622 node = node->parent;
11623 if (node == NULL)
11624 return(XML_ERR_INTERNAL_ERROR);
11625 if (node->type == XML_ELEMENT_NODE)
11626 doc = node->doc;
11627 else
11628 doc = (xmlDocPtr) node;
11629 if (doc == NULL)
11630 return(XML_ERR_INTERNAL_ERROR);
11631
11632 /*
11633 * allocate a context and set-up everything not related to the
11634 * node position in the tree
11635 */
11636 if (doc->type == XML_DOCUMENT_NODE)
11637 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11638#ifdef LIBXML_HTML_ENABLED
11639 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11640 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11641#endif
11642 else
11643 return(XML_ERR_INTERNAL_ERROR);
11644
11645 if (ctxt == NULL)
11646 return(XML_ERR_NO_MEMORY);
11647 fake = xmlNewComment(NULL);
11648 if (fake == NULL) {
11649 xmlFreeParserCtxt(ctxt);
11650 return(XML_ERR_NO_MEMORY);
11651 }
11652 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011653
11654 /*
11655 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11656 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11657 * we must wait until the last moment to free the original one.
11658 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011659 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011660 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011661 xmlDictFree(ctxt->dict);
11662 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011663 } else
11664 options |= XML_PARSE_NODICT;
11665
11666 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011667 xmlDetectSAX2(ctxt);
11668 ctxt->myDoc = doc;
11669
11670 if (node->type == XML_ELEMENT_NODE) {
11671 nodePush(ctxt, node);
11672 /*
11673 * initialize the SAX2 namespaces stack
11674 */
11675 cur = node;
11676 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11677 xmlNsPtr ns = cur->nsDef;
11678 const xmlChar *iprefix, *ihref;
11679
11680 while (ns != NULL) {
11681 if (ctxt->dict) {
11682 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11683 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11684 } else {
11685 iprefix = ns->prefix;
11686 ihref = ns->href;
11687 }
11688
11689 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11690 nsPush(ctxt, iprefix, ihref);
11691 nsnr++;
11692 }
11693 ns = ns->next;
11694 }
11695 cur = cur->parent;
11696 }
11697 ctxt->instate = XML_PARSER_CONTENT;
11698 }
11699
11700 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11701 /*
11702 * ID/IDREF registration will be done in xmlValidateElement below
11703 */
11704 ctxt->loadsubset |= XML_SKIP_IDS;
11705 }
11706
11707 xmlParseContent(ctxt);
11708 nsPop(ctxt, nsnr);
11709 if ((RAW == '<') && (NXT(1) == '/')) {
11710 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11711 } else if (RAW != 0) {
11712 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11713 }
11714 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11715 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11716 ctxt->wellFormed = 0;
11717 }
11718
11719 if (!ctxt->wellFormed) {
11720 if (ctxt->errNo == 0)
11721 ret = XML_ERR_INTERNAL_ERROR;
11722 else
11723 ret = (xmlParserErrors)ctxt->errNo;
11724 } else {
11725 ret = XML_ERR_OK;
11726 }
11727
11728 /*
11729 * Return the newly created nodeset after unlinking it from
11730 * the pseudo sibling.
11731 */
11732
11733 cur = fake->next;
11734 fake->next = NULL;
11735 node->last = fake;
11736
11737 if (cur != NULL) {
11738 cur->prev = NULL;
11739 }
11740
11741 *lst = cur;
11742
11743 while (cur != NULL) {
11744 cur->parent = NULL;
11745 cur = cur->next;
11746 }
11747
11748 xmlUnlinkNode(fake);
11749 xmlFreeNode(fake);
11750
11751
11752 if (ret != XML_ERR_OK) {
11753 xmlFreeNodeList(*lst);
11754 *lst = NULL;
11755 }
William M. Brackc3f81342004-10-03 01:22:44 +000011756
William M. Brackb7b54de2004-10-06 16:38:01 +000011757 if (doc->dict != NULL)
11758 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011759 xmlFreeParserCtxt(ctxt);
11760
11761 return(ret);
11762#else /* !SAX2 */
11763 return(XML_ERR_INTERNAL_ERROR);
11764#endif
11765}
11766
Daniel Veillard81273902003-09-30 00:43:48 +000011767#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011768/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011769 * xmlParseBalancedChunkMemoryRecover:
11770 * @doc: the document the chunk pertains to
11771 * @sax: the SAX handler bloc (possibly NULL)
11772 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11773 * @depth: Used for loop detection, use 0
11774 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11775 * @lst: the return value for the set of parsed nodes
11776 * @recover: return nodes even if the data is broken (use 0)
11777 *
11778 *
11779 * Parse a well-balanced chunk of an XML document
11780 * called by the parser
11781 * The allowed sequence for the Well Balanced Chunk is the one defined by
11782 * the content production in the XML grammar:
11783 *
11784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11785 *
11786 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11787 * the parser error code otherwise
11788 *
11789 * In case recover is set to 1, the nodelist will not be empty even if
11790 * the parsed chunk is not well balanced.
11791 */
11792int
11793xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11794 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11795 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011796 xmlParserCtxtPtr ctxt;
11797 xmlDocPtr newDoc;
11798 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011799 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011800 int size;
11801 int ret = 0;
11802
11803 if (depth > 40) {
11804 return(XML_ERR_ENTITY_LOOP);
11805 }
11806
11807
Daniel Veillardcda96922001-08-21 10:56:31 +000011808 if (lst != NULL)
11809 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011810 if (string == NULL)
11811 return(-1);
11812
11813 size = xmlStrlen(string);
11814
11815 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11816 if (ctxt == NULL) return(-1);
11817 ctxt->userData = ctxt;
11818 if (sax != NULL) {
11819 oldsax = ctxt->sax;
11820 ctxt->sax = sax;
11821 if (user_data != NULL)
11822 ctxt->userData = user_data;
11823 }
11824 newDoc = xmlNewDoc(BAD_CAST "1.0");
11825 if (newDoc == NULL) {
11826 xmlFreeParserCtxt(ctxt);
11827 return(-1);
11828 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011829 if ((doc != NULL) && (doc->dict != NULL)) {
11830 xmlDictFree(ctxt->dict);
11831 ctxt->dict = doc->dict;
11832 xmlDictReference(ctxt->dict);
11833 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11834 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11835 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11836 ctxt->dictNames = 1;
11837 } else {
11838 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11839 }
Owen Taylor3473f882001-02-23 17:55:21 +000011840 if (doc != NULL) {
11841 newDoc->intSubset = doc->intSubset;
11842 newDoc->extSubset = doc->extSubset;
11843 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011844 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11845 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011846 if (sax != NULL)
11847 ctxt->sax = oldsax;
11848 xmlFreeParserCtxt(ctxt);
11849 newDoc->intSubset = NULL;
11850 newDoc->extSubset = NULL;
11851 xmlFreeDoc(newDoc);
11852 return(-1);
11853 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011854 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11855 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011856 if (doc == NULL) {
11857 ctxt->myDoc = newDoc;
11858 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011859 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011860 newDoc->children->doc = doc;
11861 }
11862 ctxt->instate = XML_PARSER_CONTENT;
11863 ctxt->depth = depth;
11864
11865 /*
11866 * Doing validity checking on chunk doesn't make sense
11867 */
11868 ctxt->validate = 0;
11869 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011870 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011871
Daniel Veillardb39bc392002-10-26 19:29:51 +000011872 if ( doc != NULL ){
11873 content = doc->children;
11874 doc->children = NULL;
11875 xmlParseContent(ctxt);
11876 doc->children = content;
11877 }
11878 else {
11879 xmlParseContent(ctxt);
11880 }
Owen Taylor3473f882001-02-23 17:55:21 +000011881 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011882 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011883 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011884 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011885 }
11886 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011887 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011888 }
11889
11890 if (!ctxt->wellFormed) {
11891 if (ctxt->errNo == 0)
11892 ret = 1;
11893 else
11894 ret = ctxt->errNo;
11895 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011896 ret = 0;
11897 }
11898
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011899 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11900 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011901
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011902 /*
11903 * Return the newly created nodeset after unlinking it from
11904 * they pseudo parent.
11905 */
11906 cur = newDoc->children->children;
11907 *lst = cur;
11908 while (cur != NULL) {
11909 xmlSetTreeDoc(cur, doc);
11910 cur->parent = NULL;
11911 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011912 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011913 newDoc->children->children = NULL;
11914 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011915
Owen Taylor3473f882001-02-23 17:55:21 +000011916 if (sax != NULL)
11917 ctxt->sax = oldsax;
11918 xmlFreeParserCtxt(ctxt);
11919 newDoc->intSubset = NULL;
11920 newDoc->extSubset = NULL;
11921 xmlFreeDoc(newDoc);
11922
11923 return(ret);
11924}
11925
11926/**
11927 * xmlSAXParseEntity:
11928 * @sax: the SAX handler block
11929 * @filename: the filename
11930 *
11931 * parse an XML external entity out of context and build a tree.
11932 * It use the given SAX function block to handle the parsing callback.
11933 * If sax is NULL, fallback to the default DOM tree building routines.
11934 *
11935 * [78] extParsedEnt ::= TextDecl? content
11936 *
11937 * This correspond to a "Well Balanced" chunk
11938 *
11939 * Returns the resulting document tree
11940 */
11941
11942xmlDocPtr
11943xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11944 xmlDocPtr ret;
11945 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011946
11947 ctxt = xmlCreateFileParserCtxt(filename);
11948 if (ctxt == NULL) {
11949 return(NULL);
11950 }
11951 if (sax != NULL) {
11952 if (ctxt->sax != NULL)
11953 xmlFree(ctxt->sax);
11954 ctxt->sax = sax;
11955 ctxt->userData = NULL;
11956 }
11957
Owen Taylor3473f882001-02-23 17:55:21 +000011958 xmlParseExtParsedEnt(ctxt);
11959
11960 if (ctxt->wellFormed)
11961 ret = ctxt->myDoc;
11962 else {
11963 ret = NULL;
11964 xmlFreeDoc(ctxt->myDoc);
11965 ctxt->myDoc = NULL;
11966 }
11967 if (sax != NULL)
11968 ctxt->sax = NULL;
11969 xmlFreeParserCtxt(ctxt);
11970
11971 return(ret);
11972}
11973
11974/**
11975 * xmlParseEntity:
11976 * @filename: the filename
11977 *
11978 * parse an XML external entity out of context and build a tree.
11979 *
11980 * [78] extParsedEnt ::= TextDecl? content
11981 *
11982 * This correspond to a "Well Balanced" chunk
11983 *
11984 * Returns the resulting document tree
11985 */
11986
11987xmlDocPtr
11988xmlParseEntity(const char *filename) {
11989 return(xmlSAXParseEntity(NULL, filename));
11990}
Daniel Veillard81273902003-09-30 00:43:48 +000011991#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011992
11993/**
11994 * xmlCreateEntityParserCtxt:
11995 * @URL: the entity URL
11996 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011997 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011998 *
11999 * Create a parser context for an external entity
12000 * Automatic support for ZLIB/Compress compressed document is provided
12001 * by default if found at compile-time.
12002 *
12003 * Returns the new parser context or NULL
12004 */
12005xmlParserCtxtPtr
12006xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12007 const xmlChar *base) {
12008 xmlParserCtxtPtr ctxt;
12009 xmlParserInputPtr inputStream;
12010 char *directory = NULL;
12011 xmlChar *uri;
12012
12013 ctxt = xmlNewParserCtxt();
12014 if (ctxt == NULL) {
12015 return(NULL);
12016 }
12017
12018 uri = xmlBuildURI(URL, base);
12019
12020 if (uri == NULL) {
12021 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12022 if (inputStream == NULL) {
12023 xmlFreeParserCtxt(ctxt);
12024 return(NULL);
12025 }
12026
12027 inputPush(ctxt, inputStream);
12028
12029 if ((ctxt->directory == NULL) && (directory == NULL))
12030 directory = xmlParserGetDirectory((char *)URL);
12031 if ((ctxt->directory == NULL) && (directory != NULL))
12032 ctxt->directory = directory;
12033 } else {
12034 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12035 if (inputStream == NULL) {
12036 xmlFree(uri);
12037 xmlFreeParserCtxt(ctxt);
12038 return(NULL);
12039 }
12040
12041 inputPush(ctxt, inputStream);
12042
12043 if ((ctxt->directory == NULL) && (directory == NULL))
12044 directory = xmlParserGetDirectory((char *)uri);
12045 if ((ctxt->directory == NULL) && (directory != NULL))
12046 ctxt->directory = directory;
12047 xmlFree(uri);
12048 }
Owen Taylor3473f882001-02-23 17:55:21 +000012049 return(ctxt);
12050}
12051
12052/************************************************************************
12053 * *
12054 * Front ends when parsing from a file *
12055 * *
12056 ************************************************************************/
12057
12058/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012059 * xmlCreateURLParserCtxt:
12060 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012061 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012062 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012063 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012064 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012065 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012066 *
12067 * Returns the new parser context or NULL
12068 */
12069xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012070xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012071{
12072 xmlParserCtxtPtr ctxt;
12073 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012074 char *directory = NULL;
12075
Owen Taylor3473f882001-02-23 17:55:21 +000012076 ctxt = xmlNewParserCtxt();
12077 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012078 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012079 return(NULL);
12080 }
12081
Daniel Veillarddf292f72005-01-16 19:00:15 +000012082 if (options)
12083 xmlCtxtUseOptions(ctxt, options);
12084 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012085
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012086 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012087 if (inputStream == NULL) {
12088 xmlFreeParserCtxt(ctxt);
12089 return(NULL);
12090 }
12091
Owen Taylor3473f882001-02-23 17:55:21 +000012092 inputPush(ctxt, inputStream);
12093 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012094 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012095 if ((ctxt->directory == NULL) && (directory != NULL))
12096 ctxt->directory = directory;
12097
12098 return(ctxt);
12099}
12100
Daniel Veillard61b93382003-11-03 14:28:31 +000012101/**
12102 * xmlCreateFileParserCtxt:
12103 * @filename: the filename
12104 *
12105 * Create a parser context for a file content.
12106 * Automatic support for ZLIB/Compress compressed document is provided
12107 * by default if found at compile-time.
12108 *
12109 * Returns the new parser context or NULL
12110 */
12111xmlParserCtxtPtr
12112xmlCreateFileParserCtxt(const char *filename)
12113{
12114 return(xmlCreateURLParserCtxt(filename, 0));
12115}
12116
Daniel Veillard81273902003-09-30 00:43:48 +000012117#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012118/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012119 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012120 * @sax: the SAX handler block
12121 * @filename: the filename
12122 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12123 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012124 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012125 *
12126 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12127 * compressed document is provided by default if found at compile-time.
12128 * It use the given SAX function block to handle the parsing callback.
12129 * If sax is NULL, fallback to the default DOM tree building routines.
12130 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012131 * User data (void *) is stored within the parser context in the
12132 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012133 *
Owen Taylor3473f882001-02-23 17:55:21 +000012134 * Returns the resulting document tree
12135 */
12136
12137xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012138xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12139 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012140 xmlDocPtr ret;
12141 xmlParserCtxtPtr ctxt;
12142 char *directory = NULL;
12143
Daniel Veillard635ef722001-10-29 11:48:19 +000012144 xmlInitParser();
12145
Owen Taylor3473f882001-02-23 17:55:21 +000012146 ctxt = xmlCreateFileParserCtxt(filename);
12147 if (ctxt == NULL) {
12148 return(NULL);
12149 }
12150 if (sax != NULL) {
12151 if (ctxt->sax != NULL)
12152 xmlFree(ctxt->sax);
12153 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012154 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012155 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012156 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012157 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012158 }
Owen Taylor3473f882001-02-23 17:55:21 +000012159
12160 if ((ctxt->directory == NULL) && (directory == NULL))
12161 directory = xmlParserGetDirectory(filename);
12162 if ((ctxt->directory == NULL) && (directory != NULL))
12163 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12164
Daniel Veillarddad3f682002-11-17 16:47:27 +000012165 ctxt->recovery = recovery;
12166
Owen Taylor3473f882001-02-23 17:55:21 +000012167 xmlParseDocument(ctxt);
12168
William M. Brackc07329e2003-09-08 01:57:30 +000012169 if ((ctxt->wellFormed) || recovery) {
12170 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012171 if (ret != NULL) {
12172 if (ctxt->input->buf->compressed > 0)
12173 ret->compression = 9;
12174 else
12175 ret->compression = ctxt->input->buf->compressed;
12176 }
William M. Brackc07329e2003-09-08 01:57:30 +000012177 }
Owen Taylor3473f882001-02-23 17:55:21 +000012178 else {
12179 ret = NULL;
12180 xmlFreeDoc(ctxt->myDoc);
12181 ctxt->myDoc = NULL;
12182 }
12183 if (sax != NULL)
12184 ctxt->sax = NULL;
12185 xmlFreeParserCtxt(ctxt);
12186
12187 return(ret);
12188}
12189
12190/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012191 * xmlSAXParseFile:
12192 * @sax: the SAX handler block
12193 * @filename: the filename
12194 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12195 * documents
12196 *
12197 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12198 * compressed document is provided by default if found at compile-time.
12199 * It use the given SAX function block to handle the parsing callback.
12200 * If sax is NULL, fallback to the default DOM tree building routines.
12201 *
12202 * Returns the resulting document tree
12203 */
12204
12205xmlDocPtr
12206xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12207 int recovery) {
12208 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12209}
12210
12211/**
Owen Taylor3473f882001-02-23 17:55:21 +000012212 * xmlRecoverDoc:
12213 * @cur: a pointer to an array of xmlChar
12214 *
12215 * parse an XML in-memory document and build a tree.
12216 * In the case the document is not Well Formed, a tree is built anyway
12217 *
12218 * Returns the resulting document tree
12219 */
12220
12221xmlDocPtr
12222xmlRecoverDoc(xmlChar *cur) {
12223 return(xmlSAXParseDoc(NULL, cur, 1));
12224}
12225
12226/**
12227 * xmlParseFile:
12228 * @filename: the filename
12229 *
12230 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12231 * compressed document is provided by default if found at compile-time.
12232 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012233 * Returns the resulting document tree if the file was wellformed,
12234 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012235 */
12236
12237xmlDocPtr
12238xmlParseFile(const char *filename) {
12239 return(xmlSAXParseFile(NULL, filename, 0));
12240}
12241
12242/**
12243 * xmlRecoverFile:
12244 * @filename: the filename
12245 *
12246 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12247 * compressed document is provided by default if found at compile-time.
12248 * In the case the document is not Well Formed, a tree is built anyway
12249 *
12250 * Returns the resulting document tree
12251 */
12252
12253xmlDocPtr
12254xmlRecoverFile(const char *filename) {
12255 return(xmlSAXParseFile(NULL, filename, 1));
12256}
12257
12258
12259/**
12260 * xmlSetupParserForBuffer:
12261 * @ctxt: an XML parser context
12262 * @buffer: a xmlChar * buffer
12263 * @filename: a file name
12264 *
12265 * Setup the parser context to parse a new buffer; Clears any prior
12266 * contents from the parser context. The buffer parameter must not be
12267 * NULL, but the filename parameter can be
12268 */
12269void
12270xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12271 const char* filename)
12272{
12273 xmlParserInputPtr input;
12274
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012275 if ((ctxt == NULL) || (buffer == NULL))
12276 return;
12277
Owen Taylor3473f882001-02-23 17:55:21 +000012278 input = xmlNewInputStream(ctxt);
12279 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012280 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012281 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012282 return;
12283 }
12284
12285 xmlClearParserCtxt(ctxt);
12286 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012287 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012288 input->base = buffer;
12289 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012290 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012291 inputPush(ctxt, input);
12292}
12293
12294/**
12295 * xmlSAXUserParseFile:
12296 * @sax: a SAX handler
12297 * @user_data: The user data returned on SAX callbacks
12298 * @filename: a file name
12299 *
12300 * parse an XML file and call the given SAX handler routines.
12301 * Automatic support for ZLIB/Compress compressed document is provided
12302 *
12303 * Returns 0 in case of success or a error number otherwise
12304 */
12305int
12306xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12307 const char *filename) {
12308 int ret = 0;
12309 xmlParserCtxtPtr ctxt;
12310
12311 ctxt = xmlCreateFileParserCtxt(filename);
12312 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012313#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012314 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012315#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012316 xmlFree(ctxt->sax);
12317 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012318 xmlDetectSAX2(ctxt);
12319
Owen Taylor3473f882001-02-23 17:55:21 +000012320 if (user_data != NULL)
12321 ctxt->userData = user_data;
12322
12323 xmlParseDocument(ctxt);
12324
12325 if (ctxt->wellFormed)
12326 ret = 0;
12327 else {
12328 if (ctxt->errNo != 0)
12329 ret = ctxt->errNo;
12330 else
12331 ret = -1;
12332 }
12333 if (sax != NULL)
12334 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012335 if (ctxt->myDoc != NULL) {
12336 xmlFreeDoc(ctxt->myDoc);
12337 ctxt->myDoc = NULL;
12338 }
Owen Taylor3473f882001-02-23 17:55:21 +000012339 xmlFreeParserCtxt(ctxt);
12340
12341 return ret;
12342}
Daniel Veillard81273902003-09-30 00:43:48 +000012343#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012344
12345/************************************************************************
12346 * *
12347 * Front ends when parsing from memory *
12348 * *
12349 ************************************************************************/
12350
12351/**
12352 * xmlCreateMemoryParserCtxt:
12353 * @buffer: a pointer to a char array
12354 * @size: the size of the array
12355 *
12356 * Create a parser context for an XML in-memory document.
12357 *
12358 * Returns the new parser context or NULL
12359 */
12360xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012361xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012362 xmlParserCtxtPtr ctxt;
12363 xmlParserInputPtr input;
12364 xmlParserInputBufferPtr buf;
12365
12366 if (buffer == NULL)
12367 return(NULL);
12368 if (size <= 0)
12369 return(NULL);
12370
12371 ctxt = xmlNewParserCtxt();
12372 if (ctxt == NULL)
12373 return(NULL);
12374
Daniel Veillard53350552003-09-18 13:35:51 +000012375 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012376 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012377 if (buf == NULL) {
12378 xmlFreeParserCtxt(ctxt);
12379 return(NULL);
12380 }
Owen Taylor3473f882001-02-23 17:55:21 +000012381
12382 input = xmlNewInputStream(ctxt);
12383 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012384 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012385 xmlFreeParserCtxt(ctxt);
12386 return(NULL);
12387 }
12388
12389 input->filename = NULL;
12390 input->buf = buf;
12391 input->base = input->buf->buffer->content;
12392 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012393 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012394
12395 inputPush(ctxt, input);
12396 return(ctxt);
12397}
12398
Daniel Veillard81273902003-09-30 00:43:48 +000012399#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012400/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012401 * xmlSAXParseMemoryWithData:
12402 * @sax: the SAX handler block
12403 * @buffer: an pointer to a char array
12404 * @size: the size of the array
12405 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12406 * documents
12407 * @data: the userdata
12408 *
12409 * parse an XML in-memory block and use the given SAX function block
12410 * to handle the parsing callback. If sax is NULL, fallback to the default
12411 * DOM tree building routines.
12412 *
12413 * User data (void *) is stored within the parser context in the
12414 * context's _private member, so it is available nearly everywhere in libxml
12415 *
12416 * Returns the resulting document tree
12417 */
12418
12419xmlDocPtr
12420xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12421 int size, int recovery, void *data) {
12422 xmlDocPtr ret;
12423 xmlParserCtxtPtr ctxt;
12424
12425 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12426 if (ctxt == NULL) return(NULL);
12427 if (sax != NULL) {
12428 if (ctxt->sax != NULL)
12429 xmlFree(ctxt->sax);
12430 ctxt->sax = sax;
12431 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012432 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012433 if (data!=NULL) {
12434 ctxt->_private=data;
12435 }
12436
Daniel Veillardadba5f12003-04-04 16:09:01 +000012437 ctxt->recovery = recovery;
12438
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012439 xmlParseDocument(ctxt);
12440
12441 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12442 else {
12443 ret = NULL;
12444 xmlFreeDoc(ctxt->myDoc);
12445 ctxt->myDoc = NULL;
12446 }
12447 if (sax != NULL)
12448 ctxt->sax = NULL;
12449 xmlFreeParserCtxt(ctxt);
12450
12451 return(ret);
12452}
12453
12454/**
Owen Taylor3473f882001-02-23 17:55:21 +000012455 * xmlSAXParseMemory:
12456 * @sax: the SAX handler block
12457 * @buffer: an pointer to a char array
12458 * @size: the size of the array
12459 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12460 * documents
12461 *
12462 * parse an XML in-memory block and use the given SAX function block
12463 * to handle the parsing callback. If sax is NULL, fallback to the default
12464 * DOM tree building routines.
12465 *
12466 * Returns the resulting document tree
12467 */
12468xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012469xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12470 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012471 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012472}
12473
12474/**
12475 * xmlParseMemory:
12476 * @buffer: an pointer to a char array
12477 * @size: the size of the array
12478 *
12479 * parse an XML in-memory block and build a tree.
12480 *
12481 * Returns the resulting document tree
12482 */
12483
Daniel Veillard50822cb2001-07-26 20:05:51 +000012484xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012485 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12486}
12487
12488/**
12489 * xmlRecoverMemory:
12490 * @buffer: an pointer to a char array
12491 * @size: the size of the array
12492 *
12493 * parse an XML in-memory block and build a tree.
12494 * In the case the document is not Well Formed, a tree is built anyway
12495 *
12496 * Returns the resulting document tree
12497 */
12498
Daniel Veillard50822cb2001-07-26 20:05:51 +000012499xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012500 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12501}
12502
12503/**
12504 * xmlSAXUserParseMemory:
12505 * @sax: a SAX handler
12506 * @user_data: The user data returned on SAX callbacks
12507 * @buffer: an in-memory XML document input
12508 * @size: the length of the XML document in bytes
12509 *
12510 * A better SAX parsing routine.
12511 * parse an XML in-memory buffer and call the given SAX handler routines.
12512 *
12513 * Returns 0 in case of success or a error number otherwise
12514 */
12515int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012516 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012517 int ret = 0;
12518 xmlParserCtxtPtr ctxt;
12519 xmlSAXHandlerPtr oldsax = NULL;
12520
Daniel Veillard9e923512002-08-14 08:48:52 +000012521 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012522 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12523 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012524 oldsax = ctxt->sax;
12525 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012526 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012527 if (user_data != NULL)
12528 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012529
12530 xmlParseDocument(ctxt);
12531
12532 if (ctxt->wellFormed)
12533 ret = 0;
12534 else {
12535 if (ctxt->errNo != 0)
12536 ret = ctxt->errNo;
12537 else
12538 ret = -1;
12539 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012540 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012541 if (ctxt->myDoc != NULL) {
12542 xmlFreeDoc(ctxt->myDoc);
12543 ctxt->myDoc = NULL;
12544 }
Owen Taylor3473f882001-02-23 17:55:21 +000012545 xmlFreeParserCtxt(ctxt);
12546
12547 return ret;
12548}
Daniel Veillard81273902003-09-30 00:43:48 +000012549#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012550
12551/**
12552 * xmlCreateDocParserCtxt:
12553 * @cur: a pointer to an array of xmlChar
12554 *
12555 * Creates a parser context for an XML in-memory document.
12556 *
12557 * Returns the new parser context or NULL
12558 */
12559xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012560xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012561 int len;
12562
12563 if (cur == NULL)
12564 return(NULL);
12565 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012566 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012567}
12568
Daniel Veillard81273902003-09-30 00:43:48 +000012569#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012570/**
12571 * xmlSAXParseDoc:
12572 * @sax: the SAX handler block
12573 * @cur: a pointer to an array of xmlChar
12574 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12575 * documents
12576 *
12577 * parse an XML in-memory document and build a tree.
12578 * It use the given SAX function block to handle the parsing callback.
12579 * If sax is NULL, fallback to the default DOM tree building routines.
12580 *
12581 * Returns the resulting document tree
12582 */
12583
12584xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012585xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012586 xmlDocPtr ret;
12587 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012588 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012589
Daniel Veillard38936062004-11-04 17:45:11 +000012590 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012591
12592
12593 ctxt = xmlCreateDocParserCtxt(cur);
12594 if (ctxt == NULL) return(NULL);
12595 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012596 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012597 ctxt->sax = sax;
12598 ctxt->userData = NULL;
12599 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012600 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012601
12602 xmlParseDocument(ctxt);
12603 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12604 else {
12605 ret = NULL;
12606 xmlFreeDoc(ctxt->myDoc);
12607 ctxt->myDoc = NULL;
12608 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012609 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012610 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012611 xmlFreeParserCtxt(ctxt);
12612
12613 return(ret);
12614}
12615
12616/**
12617 * xmlParseDoc:
12618 * @cur: a pointer to an array of xmlChar
12619 *
12620 * parse an XML in-memory document and build a tree.
12621 *
12622 * Returns the resulting document tree
12623 */
12624
12625xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012626xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012627 return(xmlSAXParseDoc(NULL, cur, 0));
12628}
Daniel Veillard81273902003-09-30 00:43:48 +000012629#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012630
Daniel Veillard81273902003-09-30 00:43:48 +000012631#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012632/************************************************************************
12633 * *
12634 * Specific function to keep track of entities references *
12635 * and used by the XSLT debugger *
12636 * *
12637 ************************************************************************/
12638
12639static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12640
12641/**
12642 * xmlAddEntityReference:
12643 * @ent : A valid entity
12644 * @firstNode : A valid first node for children of entity
12645 * @lastNode : A valid last node of children entity
12646 *
12647 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12648 */
12649static void
12650xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12651 xmlNodePtr lastNode)
12652{
12653 if (xmlEntityRefFunc != NULL) {
12654 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12655 }
12656}
12657
12658
12659/**
12660 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012661 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012662 *
12663 * Set the function to call call back when a xml reference has been made
12664 */
12665void
12666xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12667{
12668 xmlEntityRefFunc = func;
12669}
Daniel Veillard81273902003-09-30 00:43:48 +000012670#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012671
12672/************************************************************************
12673 * *
12674 * Miscellaneous *
12675 * *
12676 ************************************************************************/
12677
12678#ifdef LIBXML_XPATH_ENABLED
12679#include <libxml/xpath.h>
12680#endif
12681
Daniel Veillardffa3c742005-07-21 13:24:09 +000012682extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012683static int xmlParserInitialized = 0;
12684
12685/**
12686 * xmlInitParser:
12687 *
12688 * Initialization function for the XML parser.
12689 * This is not reentrant. Call once before processing in case of
12690 * use in multithreaded programs.
12691 */
12692
12693void
12694xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012695 if (xmlParserInitialized != 0)
12696 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012697
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012698 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12699 (xmlGenericError == NULL))
12700 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012701 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012702 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012703 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012704 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012705 xmlDefaultSAXHandlerInit();
12706 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012707#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012708 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012709#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012710#ifdef LIBXML_HTML_ENABLED
12711 htmlInitAutoClose();
12712 htmlDefaultSAXHandlerInit();
12713#endif
12714#ifdef LIBXML_XPATH_ENABLED
12715 xmlXPathInit();
12716#endif
12717 xmlParserInitialized = 1;
12718}
12719
12720/**
12721 * xmlCleanupParser:
12722 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012723 * Cleanup function for the XML library. It tries to reclaim all
12724 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012725 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012726 * function should not prevent reusing the library but one should
12727 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012728 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012729 */
12730
12731void
12732xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012733 if (!xmlParserInitialized)
12734 return;
12735
Owen Taylor3473f882001-02-23 17:55:21 +000012736 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012737#ifdef LIBXML_CATALOG_ENABLED
12738 xmlCatalogCleanup();
12739#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012740 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012741 xmlCleanupInputCallbacks();
12742#ifdef LIBXML_OUTPUT_ENABLED
12743 xmlCleanupOutputCallbacks();
12744#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012745#ifdef LIBXML_SCHEMAS_ENABLED
12746 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012747 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012748#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012749 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012750 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012751 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012752 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012753 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012754}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012755
12756/************************************************************************
12757 * *
12758 * New set (2.6.0) of simpler and more flexible APIs *
12759 * *
12760 ************************************************************************/
12761
12762/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012763 * DICT_FREE:
12764 * @str: a string
12765 *
12766 * Free a string if it is not owned by the "dict" dictionnary in the
12767 * current scope
12768 */
12769#define DICT_FREE(str) \
12770 if ((str) && ((!dict) || \
12771 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12772 xmlFree((char *)(str));
12773
12774/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012775 * xmlCtxtReset:
12776 * @ctxt: an XML parser context
12777 *
12778 * Reset a parser context
12779 */
12780void
12781xmlCtxtReset(xmlParserCtxtPtr ctxt)
12782{
12783 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012784 xmlDictPtr dict;
12785
12786 if (ctxt == NULL)
12787 return;
12788
12789 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012790
12791 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12792 xmlFreeInputStream(input);
12793 }
12794 ctxt->inputNr = 0;
12795 ctxt->input = NULL;
12796
12797 ctxt->spaceNr = 0;
12798 ctxt->spaceTab[0] = -1;
12799 ctxt->space = &ctxt->spaceTab[0];
12800
12801
12802 ctxt->nodeNr = 0;
12803 ctxt->node = NULL;
12804
12805 ctxt->nameNr = 0;
12806 ctxt->name = NULL;
12807
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012808 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012809 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012810 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012811 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012812 DICT_FREE(ctxt->directory);
12813 ctxt->directory = NULL;
12814 DICT_FREE(ctxt->extSubURI);
12815 ctxt->extSubURI = NULL;
12816 DICT_FREE(ctxt->extSubSystem);
12817 ctxt->extSubSystem = NULL;
12818 if (ctxt->myDoc != NULL)
12819 xmlFreeDoc(ctxt->myDoc);
12820 ctxt->myDoc = NULL;
12821
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012822 ctxt->standalone = -1;
12823 ctxt->hasExternalSubset = 0;
12824 ctxt->hasPErefs = 0;
12825 ctxt->html = 0;
12826 ctxt->external = 0;
12827 ctxt->instate = XML_PARSER_START;
12828 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012829
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012830 ctxt->wellFormed = 1;
12831 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012832 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012833 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012834#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012835 ctxt->vctxt.userData = ctxt;
12836 ctxt->vctxt.error = xmlParserValidityError;
12837 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012838#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012839 ctxt->record_info = 0;
12840 ctxt->nbChars = 0;
12841 ctxt->checkIndex = 0;
12842 ctxt->inSubset = 0;
12843 ctxt->errNo = XML_ERR_OK;
12844 ctxt->depth = 0;
12845 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12846 ctxt->catalogs = NULL;
12847 xmlInitNodeInfoSeq(&ctxt->node_seq);
12848
12849 if (ctxt->attsDefault != NULL) {
12850 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12851 ctxt->attsDefault = NULL;
12852 }
12853 if (ctxt->attsSpecial != NULL) {
12854 xmlHashFree(ctxt->attsSpecial, NULL);
12855 ctxt->attsSpecial = NULL;
12856 }
12857
Daniel Veillard4432df22003-09-28 18:58:27 +000012858#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012859 if (ctxt->catalogs != NULL)
12860 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012861#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012862 if (ctxt->lastError.code != XML_ERR_OK)
12863 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012864}
12865
12866/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012867 * xmlCtxtResetPush:
12868 * @ctxt: an XML parser context
12869 * @chunk: a pointer to an array of chars
12870 * @size: number of chars in the array
12871 * @filename: an optional file name or URI
12872 * @encoding: the document encoding, or NULL
12873 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012874 * Reset a push parser context
12875 *
12876 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012877 */
12878int
12879xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12880 int size, const char *filename, const char *encoding)
12881{
12882 xmlParserInputPtr inputStream;
12883 xmlParserInputBufferPtr buf;
12884 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12885
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012886 if (ctxt == NULL)
12887 return(1);
12888
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012889 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12890 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12891
12892 buf = xmlAllocParserInputBuffer(enc);
12893 if (buf == NULL)
12894 return(1);
12895
12896 if (ctxt == NULL) {
12897 xmlFreeParserInputBuffer(buf);
12898 return(1);
12899 }
12900
12901 xmlCtxtReset(ctxt);
12902
12903 if (ctxt->pushTab == NULL) {
12904 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12905 sizeof(xmlChar *));
12906 if (ctxt->pushTab == NULL) {
12907 xmlErrMemory(ctxt, NULL);
12908 xmlFreeParserInputBuffer(buf);
12909 return(1);
12910 }
12911 }
12912
12913 if (filename == NULL) {
12914 ctxt->directory = NULL;
12915 } else {
12916 ctxt->directory = xmlParserGetDirectory(filename);
12917 }
12918
12919 inputStream = xmlNewInputStream(ctxt);
12920 if (inputStream == NULL) {
12921 xmlFreeParserInputBuffer(buf);
12922 return(1);
12923 }
12924
12925 if (filename == NULL)
12926 inputStream->filename = NULL;
12927 else
12928 inputStream->filename = (char *)
12929 xmlCanonicPath((const xmlChar *) filename);
12930 inputStream->buf = buf;
12931 inputStream->base = inputStream->buf->buffer->content;
12932 inputStream->cur = inputStream->buf->buffer->content;
12933 inputStream->end =
12934 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12935
12936 inputPush(ctxt, inputStream);
12937
12938 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12939 (ctxt->input->buf != NULL)) {
12940 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12941 int cur = ctxt->input->cur - ctxt->input->base;
12942
12943 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12944
12945 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12946 ctxt->input->cur = ctxt->input->base + cur;
12947 ctxt->input->end =
12948 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12949 use];
12950#ifdef DEBUG_PUSH
12951 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12952#endif
12953 }
12954
12955 if (encoding != NULL) {
12956 xmlCharEncodingHandlerPtr hdlr;
12957
12958 hdlr = xmlFindCharEncodingHandler(encoding);
12959 if (hdlr != NULL) {
12960 xmlSwitchToEncoding(ctxt, hdlr);
12961 } else {
12962 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12963 "Unsupported encoding %s\n", BAD_CAST encoding);
12964 }
12965 } else if (enc != XML_CHAR_ENCODING_NONE) {
12966 xmlSwitchEncoding(ctxt, enc);
12967 }
12968
12969 return(0);
12970}
12971
12972/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012973 * xmlCtxtUseOptions:
12974 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012975 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012976 *
12977 * Applies the options to the parser context
12978 *
12979 * Returns 0 in case of success, the set of unknown or unimplemented options
12980 * in case of error.
12981 */
12982int
12983xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12984{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012985 if (ctxt == NULL)
12986 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012987 if (options & XML_PARSE_RECOVER) {
12988 ctxt->recovery = 1;
12989 options -= XML_PARSE_RECOVER;
12990 } else
12991 ctxt->recovery = 0;
12992 if (options & XML_PARSE_DTDLOAD) {
12993 ctxt->loadsubset = XML_DETECT_IDS;
12994 options -= XML_PARSE_DTDLOAD;
12995 } else
12996 ctxt->loadsubset = 0;
12997 if (options & XML_PARSE_DTDATTR) {
12998 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12999 options -= XML_PARSE_DTDATTR;
13000 }
13001 if (options & XML_PARSE_NOENT) {
13002 ctxt->replaceEntities = 1;
13003 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13004 options -= XML_PARSE_NOENT;
13005 } else
13006 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013007 if (options & XML_PARSE_PEDANTIC) {
13008 ctxt->pedantic = 1;
13009 options -= XML_PARSE_PEDANTIC;
13010 } else
13011 ctxt->pedantic = 0;
13012 if (options & XML_PARSE_NOBLANKS) {
13013 ctxt->keepBlanks = 0;
13014 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13015 options -= XML_PARSE_NOBLANKS;
13016 } else
13017 ctxt->keepBlanks = 1;
13018 if (options & XML_PARSE_DTDVALID) {
13019 ctxt->validate = 1;
13020 if (options & XML_PARSE_NOWARNING)
13021 ctxt->vctxt.warning = NULL;
13022 if (options & XML_PARSE_NOERROR)
13023 ctxt->vctxt.error = NULL;
13024 options -= XML_PARSE_DTDVALID;
13025 } else
13026 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013027 if (options & XML_PARSE_NOWARNING) {
13028 ctxt->sax->warning = NULL;
13029 options -= XML_PARSE_NOWARNING;
13030 }
13031 if (options & XML_PARSE_NOERROR) {
13032 ctxt->sax->error = NULL;
13033 ctxt->sax->fatalError = NULL;
13034 options -= XML_PARSE_NOERROR;
13035 }
Daniel Veillard81273902003-09-30 00:43:48 +000013036#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013037 if (options & XML_PARSE_SAX1) {
13038 ctxt->sax->startElement = xmlSAX2StartElement;
13039 ctxt->sax->endElement = xmlSAX2EndElement;
13040 ctxt->sax->startElementNs = NULL;
13041 ctxt->sax->endElementNs = NULL;
13042 ctxt->sax->initialized = 1;
13043 options -= XML_PARSE_SAX1;
13044 }
Daniel Veillard81273902003-09-30 00:43:48 +000013045#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013046 if (options & XML_PARSE_NODICT) {
13047 ctxt->dictNames = 0;
13048 options -= XML_PARSE_NODICT;
13049 } else {
13050 ctxt->dictNames = 1;
13051 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013052 if (options & XML_PARSE_NOCDATA) {
13053 ctxt->sax->cdataBlock = NULL;
13054 options -= XML_PARSE_NOCDATA;
13055 }
13056 if (options & XML_PARSE_NSCLEAN) {
13057 ctxt->options |= XML_PARSE_NSCLEAN;
13058 options -= XML_PARSE_NSCLEAN;
13059 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013060 if (options & XML_PARSE_NONET) {
13061 ctxt->options |= XML_PARSE_NONET;
13062 options -= XML_PARSE_NONET;
13063 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013064 if (options & XML_PARSE_COMPACT) {
13065 ctxt->options |= XML_PARSE_COMPACT;
13066 options -= XML_PARSE_COMPACT;
13067 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013068 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013069 return (options);
13070}
13071
13072/**
13073 * xmlDoRead:
13074 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013075 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013076 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013077 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013078 * @reuse: keep the context for reuse
13079 *
13080 * Common front-end for the xmlRead functions
13081 *
13082 * Returns the resulting document tree or NULL
13083 */
13084static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013085xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13086 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013087{
13088 xmlDocPtr ret;
13089
13090 xmlCtxtUseOptions(ctxt, options);
13091 if (encoding != NULL) {
13092 xmlCharEncodingHandlerPtr hdlr;
13093
13094 hdlr = xmlFindCharEncodingHandler(encoding);
13095 if (hdlr != NULL)
13096 xmlSwitchToEncoding(ctxt, hdlr);
13097 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013098 if ((URL != NULL) && (ctxt->input != NULL) &&
13099 (ctxt->input->filename == NULL))
13100 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013101 xmlParseDocument(ctxt);
13102 if ((ctxt->wellFormed) || ctxt->recovery)
13103 ret = ctxt->myDoc;
13104 else {
13105 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013106 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013107 xmlFreeDoc(ctxt->myDoc);
13108 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013109 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013110 ctxt->myDoc = NULL;
13111 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013112 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013113 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013114
13115 return (ret);
13116}
13117
13118/**
13119 * xmlReadDoc:
13120 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013121 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013122 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013123 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013124 *
13125 * parse an XML in-memory document and build a tree.
13126 *
13127 * Returns the resulting document tree
13128 */
13129xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013130xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013131{
13132 xmlParserCtxtPtr ctxt;
13133
13134 if (cur == NULL)
13135 return (NULL);
13136
13137 ctxt = xmlCreateDocParserCtxt(cur);
13138 if (ctxt == NULL)
13139 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013140 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013141}
13142
13143/**
13144 * xmlReadFile:
13145 * @filename: a file or URL
13146 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013147 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013148 *
13149 * parse an XML file from the filesystem or the network.
13150 *
13151 * Returns the resulting document tree
13152 */
13153xmlDocPtr
13154xmlReadFile(const char *filename, const char *encoding, int options)
13155{
13156 xmlParserCtxtPtr ctxt;
13157
Daniel Veillard61b93382003-11-03 14:28:31 +000013158 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013159 if (ctxt == NULL)
13160 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013161 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013162}
13163
13164/**
13165 * xmlReadMemory:
13166 * @buffer: a pointer to a char array
13167 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013168 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013169 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013170 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013171 *
13172 * parse an XML in-memory document and build a tree.
13173 *
13174 * Returns the resulting document tree
13175 */
13176xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013177xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013178{
13179 xmlParserCtxtPtr ctxt;
13180
13181 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13182 if (ctxt == NULL)
13183 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013184 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013185}
13186
13187/**
13188 * xmlReadFd:
13189 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013190 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013191 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013192 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013193 *
13194 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013195 * NOTE that the file descriptor will not be closed when the
13196 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013197 *
13198 * Returns the resulting document tree
13199 */
13200xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013201xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013202{
13203 xmlParserCtxtPtr ctxt;
13204 xmlParserInputBufferPtr input;
13205 xmlParserInputPtr stream;
13206
13207 if (fd < 0)
13208 return (NULL);
13209
13210 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13211 if (input == NULL)
13212 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013213 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013214 ctxt = xmlNewParserCtxt();
13215 if (ctxt == NULL) {
13216 xmlFreeParserInputBuffer(input);
13217 return (NULL);
13218 }
13219 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13220 if (stream == NULL) {
13221 xmlFreeParserInputBuffer(input);
13222 xmlFreeParserCtxt(ctxt);
13223 return (NULL);
13224 }
13225 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013226 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013227}
13228
13229/**
13230 * xmlReadIO:
13231 * @ioread: an I/O read function
13232 * @ioclose: an I/O close function
13233 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013234 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013235 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013236 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013237 *
13238 * parse an XML document from I/O functions and source and build a tree.
13239 *
13240 * Returns the resulting document tree
13241 */
13242xmlDocPtr
13243xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013244 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013245{
13246 xmlParserCtxtPtr ctxt;
13247 xmlParserInputBufferPtr input;
13248 xmlParserInputPtr stream;
13249
13250 if (ioread == NULL)
13251 return (NULL);
13252
13253 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13254 XML_CHAR_ENCODING_NONE);
13255 if (input == NULL)
13256 return (NULL);
13257 ctxt = xmlNewParserCtxt();
13258 if (ctxt == NULL) {
13259 xmlFreeParserInputBuffer(input);
13260 return (NULL);
13261 }
13262 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13263 if (stream == NULL) {
13264 xmlFreeParserInputBuffer(input);
13265 xmlFreeParserCtxt(ctxt);
13266 return (NULL);
13267 }
13268 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013269 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013270}
13271
13272/**
13273 * xmlCtxtReadDoc:
13274 * @ctxt: an XML parser context
13275 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013276 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013277 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013278 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013279 *
13280 * parse an XML in-memory document and build a tree.
13281 * This reuses the existing @ctxt parser context
13282 *
13283 * Returns the resulting document tree
13284 */
13285xmlDocPtr
13286xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013287 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013288{
13289 xmlParserInputPtr stream;
13290
13291 if (cur == NULL)
13292 return (NULL);
13293 if (ctxt == NULL)
13294 return (NULL);
13295
13296 xmlCtxtReset(ctxt);
13297
13298 stream = xmlNewStringInputStream(ctxt, cur);
13299 if (stream == NULL) {
13300 return (NULL);
13301 }
13302 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013303 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013304}
13305
13306/**
13307 * xmlCtxtReadFile:
13308 * @ctxt: an XML parser context
13309 * @filename: a file or URL
13310 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013311 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013312 *
13313 * parse an XML file from the filesystem or the network.
13314 * This reuses the existing @ctxt parser context
13315 *
13316 * Returns the resulting document tree
13317 */
13318xmlDocPtr
13319xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13320 const char *encoding, int options)
13321{
13322 xmlParserInputPtr stream;
13323
13324 if (filename == NULL)
13325 return (NULL);
13326 if (ctxt == NULL)
13327 return (NULL);
13328
13329 xmlCtxtReset(ctxt);
13330
Daniel Veillard29614c72004-11-26 10:47:26 +000013331 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013332 if (stream == NULL) {
13333 return (NULL);
13334 }
13335 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013336 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013337}
13338
13339/**
13340 * xmlCtxtReadMemory:
13341 * @ctxt: an XML parser context
13342 * @buffer: a pointer to a char array
13343 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013344 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013345 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013346 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013347 *
13348 * parse an XML in-memory document and build a tree.
13349 * This reuses the existing @ctxt parser context
13350 *
13351 * Returns the resulting document tree
13352 */
13353xmlDocPtr
13354xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013355 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013356{
13357 xmlParserInputBufferPtr input;
13358 xmlParserInputPtr stream;
13359
13360 if (ctxt == NULL)
13361 return (NULL);
13362 if (buffer == NULL)
13363 return (NULL);
13364
13365 xmlCtxtReset(ctxt);
13366
13367 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13368 if (input == NULL) {
13369 return(NULL);
13370 }
13371
13372 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13373 if (stream == NULL) {
13374 xmlFreeParserInputBuffer(input);
13375 return(NULL);
13376 }
13377
13378 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013379 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013380}
13381
13382/**
13383 * xmlCtxtReadFd:
13384 * @ctxt: an XML parser context
13385 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013386 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013387 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013388 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013389 *
13390 * parse an XML from a file descriptor and build a tree.
13391 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013392 * NOTE that the file descriptor will not be closed when the
13393 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013394 *
13395 * Returns the resulting document tree
13396 */
13397xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013398xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13399 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013400{
13401 xmlParserInputBufferPtr input;
13402 xmlParserInputPtr stream;
13403
13404 if (fd < 0)
13405 return (NULL);
13406 if (ctxt == NULL)
13407 return (NULL);
13408
13409 xmlCtxtReset(ctxt);
13410
13411
13412 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13413 if (input == NULL)
13414 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013415 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013416 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13417 if (stream == NULL) {
13418 xmlFreeParserInputBuffer(input);
13419 return (NULL);
13420 }
13421 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013422 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013423}
13424
13425/**
13426 * xmlCtxtReadIO:
13427 * @ctxt: an XML parser context
13428 * @ioread: an I/O read function
13429 * @ioclose: an I/O close function
13430 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013431 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013432 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013433 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013434 *
13435 * parse an XML document from I/O functions and source and build a tree.
13436 * This reuses the existing @ctxt parser context
13437 *
13438 * Returns the resulting document tree
13439 */
13440xmlDocPtr
13441xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13442 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013443 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013444 const char *encoding, int options)
13445{
13446 xmlParserInputBufferPtr input;
13447 xmlParserInputPtr stream;
13448
13449 if (ioread == NULL)
13450 return (NULL);
13451 if (ctxt == NULL)
13452 return (NULL);
13453
13454 xmlCtxtReset(ctxt);
13455
13456 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13457 XML_CHAR_ENCODING_NONE);
13458 if (input == NULL)
13459 return (NULL);
13460 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13461 if (stream == NULL) {
13462 xmlFreeParserInputBuffer(input);
13463 return (NULL);
13464 }
13465 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013466 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013467}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013468
13469#define bottom_parser
13470#include "elfgcchack.h"