blob: 8ffe2c22efd94841fe19f1341f4c1f3849657e50 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003458 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003459 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003460 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003461 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 goto get_more_space;
3463 }
3464 if (*in == '<') {
3465 nbchar = in - ctxt->input->cur;
3466 if (nbchar > 0) {
3467 const xmlChar *tmp = ctxt->input->cur;
3468 ctxt->input->cur = in;
3469
Daniel Veillard34099b42004-11-04 17:34:35 +00003470 if ((ctxt->sax != NULL) &&
3471 (ctxt->sax->ignorableWhitespace !=
3472 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003473 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003474 if (ctxt->sax->ignorableWhitespace != NULL)
3475 ctxt->sax->ignorableWhitespace(ctxt->userData,
3476 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003477 } else if (ctxt->sax->characters != NULL)
3478 ctxt->sax->characters(ctxt->userData,
3479 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003480 } else if ((ctxt->sax != NULL) &&
3481 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003482 ctxt->sax->characters(ctxt->userData,
3483 tmp, nbchar);
3484 }
3485 }
3486 return;
3487 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003488
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003489get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003491 while (test_char_data[*in]) {
3492 in++;
3493 ccol++;
3494 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003495 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003496 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003497 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003498 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003499 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003500 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003501 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003502 }
3503 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003504 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003505 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003506 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003507 return;
3508 }
3509 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003510 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 goto get_more;
3512 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003513 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003514 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003515 if ((ctxt->sax != NULL) &&
3516 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003517 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003518 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003519 const xmlChar *tmp = ctxt->input->cur;
3520 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003522 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003523 if (ctxt->sax->ignorableWhitespace != NULL)
3524 ctxt->sax->ignorableWhitespace(ctxt->userData,
3525 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003526 } else if (ctxt->sax->characters != NULL)
3527 ctxt->sax->characters(ctxt->userData,
3528 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003529 line = ctxt->input->line;
3530 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003531 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003532 if (ctxt->sax->characters != NULL)
3533 ctxt->sax->characters(ctxt->userData,
3534 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003535 line = ctxt->input->line;
3536 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003537 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003538 }
3539 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003540 if (*in == 0xD) {
3541 in++;
3542 if (*in == 0xA) {
3543 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003544 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003545 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003546 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003547 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 in--;
3549 }
3550 if (*in == '<') {
3551 return;
3552 }
3553 if (*in == '&') {
3554 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003555 }
3556 SHRINK;
3557 GROW;
3558 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003559 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003560 nbchar = 0;
3561 }
Daniel Veillard50582112001-03-26 22:52:16 +00003562 ctxt->input->line = line;
3563 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003564 xmlParseCharDataComplex(ctxt, cdata);
3565}
3566
Daniel Veillard01c13b52002-12-10 15:19:08 +00003567/**
3568 * xmlParseCharDataComplex:
3569 * @ctxt: an XML parser context
3570 * @cdata: int indicating whether we are within a CDATA section
3571 *
3572 * parse a CharData section.this is the fallback function
3573 * of xmlParseCharData() when the parsing requires handling
3574 * of non-ASCII characters.
3575 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003576void
3577xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3579 int nbchar = 0;
3580 int cur, l;
3581 int count = 0;
3582
3583 SHRINK;
3584 GROW;
3585 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003586 while ((cur != '<') && /* checked */
3587 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003588 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 if ((cur == ']') && (NXT(1) == ']') &&
3590 (NXT(2) == '>')) {
3591 if (cdata) break;
3592 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003593 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003594 }
3595 }
3596 COPY_BUF(l,buf,nbchar,cur);
3597 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003598 buf[nbchar] = 0;
3599
Owen Taylor3473f882001-02-23 17:55:21 +00003600 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003601 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003602 */
3603 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003604 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003605 if (ctxt->sax->ignorableWhitespace != NULL)
3606 ctxt->sax->ignorableWhitespace(ctxt->userData,
3607 buf, nbchar);
3608 } else {
3609 if (ctxt->sax->characters != NULL)
3610 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3611 }
3612 }
3613 nbchar = 0;
3614 }
3615 count++;
3616 if (count > 50) {
3617 GROW;
3618 count = 0;
3619 }
3620 NEXTL(l);
3621 cur = CUR_CHAR(l);
3622 }
3623 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003624 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003626 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003627 */
3628 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003629 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (ctxt->sax->ignorableWhitespace != NULL)
3631 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3632 } else {
3633 if (ctxt->sax->characters != NULL)
3634 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3635 }
3636 }
3637 }
3638}
3639
3640/**
3641 * xmlParseExternalID:
3642 * @ctxt: an XML parser context
3643 * @publicID: a xmlChar** receiving PubidLiteral
3644 * @strict: indicate whether we should restrict parsing to only
3645 * production [75], see NOTE below
3646 *
3647 * Parse an External ID or a Public ID
3648 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003649 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003650 * 'PUBLIC' S PubidLiteral S SystemLiteral
3651 *
3652 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3653 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3654 *
3655 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3656 *
3657 * Returns the function returns SystemLiteral and in the second
3658 * case publicID receives PubidLiteral, is strict is off
3659 * it is possible to return NULL and have publicID set.
3660 */
3661
3662xmlChar *
3663xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3664 xmlChar *URI = NULL;
3665
3666 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003667
3668 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003669 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003670 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003671 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3673 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 SKIP_BLANKS;
3676 URI = xmlParseSystemLiteral(ctxt);
3677 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003678 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003679 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003680 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003681 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003682 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003684 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 SKIP_BLANKS;
3687 *publicID = xmlParsePubidLiteral(ctxt);
3688 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003689 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003690 }
3691 if (strict) {
3692 /*
3693 * We don't handle [83] so "S SystemLiteral" is required.
3694 */
William M. Brack76e95df2003-10-18 16:20:14 +00003695 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003696 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003697 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 } else {
3700 /*
3701 * We handle [83] so we return immediately, if
3702 * "S SystemLiteral" is not detected. From a purely parsing
3703 * point of view that's a nice mess.
3704 */
3705 const xmlChar *ptr;
3706 GROW;
3707
3708 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003709 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003710
William M. Brack76e95df2003-10-18 16:20:14 +00003711 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003712 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3713 }
3714 SKIP_BLANKS;
3715 URI = xmlParseSystemLiteral(ctxt);
3716 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003717 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718 }
3719 }
3720 return(URI);
3721}
3722
3723/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003724 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003725 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003726 * @buf: the already parsed part of the buffer
3727 * @len: number of bytes filles in the buffer
3728 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003729 *
3730 * Skip an XML (SGML) comment <!-- .... -->
3731 * The spec says that "For compatibility, the string "--" (double-hyphen)
3732 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003733 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003734 *
3735 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3736 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003737static void
3738xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003739 int q, ql;
3740 int r, rl;
3741 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003742 xmlParserInputPtr input = ctxt->input;
3743 int count = 0;
3744
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003746 len = 0;
3747 size = XML_PARSER_BUFFER_SIZE;
3748 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3749 if (buf == NULL) {
3750 xmlErrMemory(ctxt, NULL);
3751 return;
3752 }
Owen Taylor3473f882001-02-23 17:55:21 +00003753 }
3754 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003755 if (q == 0)
3756 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003757 NEXTL(ql);
3758 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003759 if (r == 0)
3760 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 NEXTL(rl);
3762 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003763 if (cur == 0)
3764 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003765 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003766 ((cur != '>') ||
3767 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003768 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003769 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003770 }
3771 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003772 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003773 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003774 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3775 if (new_buf == NULL) {
3776 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 return;
3779 }
William M. Bracka3215c72004-07-31 16:24:01 +00003780 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 }
3782 COPY_BUF(ql,buf,len,q);
3783 q = r;
3784 ql = rl;
3785 r = cur;
3786 rl = l;
3787
3788 count++;
3789 if (count > 50) {
3790 GROW;
3791 count = 0;
3792 }
3793 NEXTL(l);
3794 cur = CUR_CHAR(l);
3795 if (cur == 0) {
3796 SHRINK;
3797 GROW;
3798 cur = CUR_CHAR(l);
3799 }
3800 }
3801 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003802 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003803 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003804 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 xmlFree(buf);
3806 } else {
3807 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003808 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3809 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 NEXT;
3812 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3813 (!ctxt->disableSAX))
3814 ctxt->sax->comment(ctxt->userData, buf);
3815 xmlFree(buf);
3816 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003817 return;
3818not_terminated:
3819 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3820 "Comment not terminated\n", NULL);
3821 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003822}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003823/**
3824 * xmlParseComment:
3825 * @ctxt: an XML parser context
3826 *
3827 * Skip an XML (SGML) comment <!-- .... -->
3828 * The spec says that "For compatibility, the string "--" (double-hyphen)
3829 * must not occur within comments. "
3830 *
3831 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3832 */
3833void
3834xmlParseComment(xmlParserCtxtPtr ctxt) {
3835 xmlChar *buf = NULL;
3836 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003837 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003838 xmlParserInputState state;
3839 const xmlChar *in;
3840 int nbchar = 0, ccol;
3841
3842 /*
3843 * Check that there is a comment right here.
3844 */
3845 if ((RAW != '<') || (NXT(1) != '!') ||
3846 (NXT(2) != '-') || (NXT(3) != '-')) return;
3847
3848 state = ctxt->instate;
3849 ctxt->instate = XML_PARSER_COMMENT;
3850 SKIP(4);
3851 SHRINK;
3852 GROW;
3853
3854 /*
3855 * Accelerated common case where input don't need to be
3856 * modified before passing it to the handler.
3857 */
3858 in = ctxt->input->cur;
3859 do {
3860 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003861 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003862 ctxt->input->line++; ctxt->input->col = 1;
3863 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003864 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003865 }
3866get_more:
3867 ccol = ctxt->input->col;
3868 while (((*in > '-') && (*in <= 0x7F)) ||
3869 ((*in >= 0x20) && (*in < '-')) ||
3870 (*in == 0x09)) {
3871 in++;
3872 ccol++;
3873 }
3874 ctxt->input->col = ccol;
3875 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003876 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003877 ctxt->input->line++; ctxt->input->col = 1;
3878 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003879 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003880 goto get_more;
3881 }
3882 nbchar = in - ctxt->input->cur;
3883 /*
3884 * save current set of data
3885 */
3886 if (nbchar > 0) {
3887 if ((ctxt->sax != NULL) &&
3888 (ctxt->sax->comment != NULL)) {
3889 if (buf == NULL) {
3890 if ((*in == '-') && (in[1] == '-'))
3891 size = nbchar + 1;
3892 else
3893 size = XML_PARSER_BUFFER_SIZE + nbchar;
3894 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3895 if (buf == NULL) {
3896 xmlErrMemory(ctxt, NULL);
3897 ctxt->instate = state;
3898 return;
3899 }
3900 len = 0;
3901 } else if (len + nbchar + 1 >= size) {
3902 xmlChar *new_buf;
3903 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3904 new_buf = (xmlChar *) xmlRealloc(buf,
3905 size * sizeof(xmlChar));
3906 if (new_buf == NULL) {
3907 xmlFree (buf);
3908 xmlErrMemory(ctxt, NULL);
3909 ctxt->instate = state;
3910 return;
3911 }
3912 buf = new_buf;
3913 }
3914 memcpy(&buf[len], ctxt->input->cur, nbchar);
3915 len += nbchar;
3916 buf[len] = 0;
3917 }
3918 }
3919 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003920 if (*in == 0xA) {
3921 in++;
3922 ctxt->input->line++; ctxt->input->col = 1;
3923 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003924 if (*in == 0xD) {
3925 in++;
3926 if (*in == 0xA) {
3927 ctxt->input->cur = in;
3928 in++;
3929 ctxt->input->line++; ctxt->input->col = 1;
3930 continue; /* while */
3931 }
3932 in--;
3933 }
3934 SHRINK;
3935 GROW;
3936 in = ctxt->input->cur;
3937 if (*in == '-') {
3938 if (in[1] == '-') {
3939 if (in[2] == '>') {
3940 SKIP(3);
3941 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3942 (!ctxt->disableSAX)) {
3943 if (buf != NULL)
3944 ctxt->sax->comment(ctxt->userData, buf);
3945 else
3946 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3947 }
3948 if (buf != NULL)
3949 xmlFree(buf);
3950 ctxt->instate = state;
3951 return;
3952 }
3953 if (buf != NULL)
3954 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3955 "Comment not terminated \n<!--%.50s\n",
3956 buf);
3957 else
3958 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3959 "Comment not terminated \n", NULL);
3960 in++;
3961 ctxt->input->col++;
3962 }
3963 in++;
3964 ctxt->input->col++;
3965 goto get_more;
3966 }
3967 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3968 xmlParseCommentComplex(ctxt, buf, len, size);
3969 ctxt->instate = state;
3970 return;
3971}
3972
Owen Taylor3473f882001-02-23 17:55:21 +00003973
3974/**
3975 * xmlParsePITarget:
3976 * @ctxt: an XML parser context
3977 *
3978 * parse the name of a PI
3979 *
3980 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3981 *
3982 * Returns the PITarget name or NULL
3983 */
3984
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003985const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003986xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003987 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003988
3989 name = xmlParseName(ctxt);
3990 if ((name != NULL) &&
3991 ((name[0] == 'x') || (name[0] == 'X')) &&
3992 ((name[1] == 'm') || (name[1] == 'M')) &&
3993 ((name[2] == 'l') || (name[2] == 'L'))) {
3994 int i;
3995 if ((name[0] == 'x') && (name[1] == 'm') &&
3996 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003997 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003998 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003999 return(name);
4000 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004001 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 return(name);
4003 }
4004 for (i = 0;;i++) {
4005 if (xmlW3CPIs[i] == NULL) break;
4006 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4007 return(name);
4008 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004009 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4010 "xmlParsePITarget: invalid name prefix 'xml'\n",
4011 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004012 }
4013 return(name);
4014}
4015
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004016#ifdef LIBXML_CATALOG_ENABLED
4017/**
4018 * xmlParseCatalogPI:
4019 * @ctxt: an XML parser context
4020 * @catalog: the PI value string
4021 *
4022 * parse an XML Catalog Processing Instruction.
4023 *
4024 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4025 *
4026 * Occurs only if allowed by the user and if happening in the Misc
4027 * part of the document before any doctype informations
4028 * This will add the given catalog to the parsing context in order
4029 * to be used if there is a resolution need further down in the document
4030 */
4031
4032static void
4033xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4034 xmlChar *URL = NULL;
4035 const xmlChar *tmp, *base;
4036 xmlChar marker;
4037
4038 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004039 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004040 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4041 goto error;
4042 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004043 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004044 if (*tmp != '=') {
4045 return;
4046 }
4047 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004048 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004049 marker = *tmp;
4050 if ((marker != '\'') && (marker != '"'))
4051 goto error;
4052 tmp++;
4053 base = tmp;
4054 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4055 if (*tmp == 0)
4056 goto error;
4057 URL = xmlStrndup(base, tmp - base);
4058 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004059 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004060 if (*tmp != 0)
4061 goto error;
4062
4063 if (URL != NULL) {
4064 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4065 xmlFree(URL);
4066 }
4067 return;
4068
4069error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004070 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4071 "Catalog PI syntax error: %s\n",
4072 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073 if (URL != NULL)
4074 xmlFree(URL);
4075}
4076#endif
4077
Owen Taylor3473f882001-02-23 17:55:21 +00004078/**
4079 * xmlParsePI:
4080 * @ctxt: an XML parser context
4081 *
4082 * parse an XML Processing Instruction.
4083 *
4084 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4085 *
4086 * The processing is transfered to SAX once parsed.
4087 */
4088
4089void
4090xmlParsePI(xmlParserCtxtPtr ctxt) {
4091 xmlChar *buf = NULL;
4092 int len = 0;
4093 int size = XML_PARSER_BUFFER_SIZE;
4094 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004095 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 xmlParserInputState state;
4097 int count = 0;
4098
4099 if ((RAW == '<') && (NXT(1) == '?')) {
4100 xmlParserInputPtr input = ctxt->input;
4101 state = ctxt->instate;
4102 ctxt->instate = XML_PARSER_PI;
4103 /*
4104 * this is a Processing Instruction.
4105 */
4106 SKIP(2);
4107 SHRINK;
4108
4109 /*
4110 * Parse the target name and check for special support like
4111 * namespace.
4112 */
4113 target = xmlParsePITarget(ctxt);
4114 if (target != NULL) {
4115 if ((RAW == '?') && (NXT(1) == '>')) {
4116 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004117 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4118 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004119 }
4120 SKIP(2);
4121
4122 /*
4123 * SAX: PI detected.
4124 */
4125 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4126 (ctxt->sax->processingInstruction != NULL))
4127 ctxt->sax->processingInstruction(ctxt->userData,
4128 target, NULL);
4129 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 return;
4131 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004132 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004133 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004134 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004135 ctxt->instate = state;
4136 return;
4137 }
4138 cur = CUR;
4139 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004140 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4141 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 }
4143 SKIP_BLANKS;
4144 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004145 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004146 ((cur != '?') || (NXT(1) != '>'))) {
4147 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004148 xmlChar *tmp;
4149
Owen Taylor3473f882001-02-23 17:55:21 +00004150 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004151 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4152 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004153 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004154 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004155 ctxt->instate = state;
4156 return;
4157 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004158 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004159 }
4160 count++;
4161 if (count > 50) {
4162 GROW;
4163 count = 0;
4164 }
4165 COPY_BUF(l,buf,len,cur);
4166 NEXTL(l);
4167 cur = CUR_CHAR(l);
4168 if (cur == 0) {
4169 SHRINK;
4170 GROW;
4171 cur = CUR_CHAR(l);
4172 }
4173 }
4174 buf[len] = 0;
4175 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004176 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4177 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004178 } else {
4179 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004180 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4181 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004182 }
4183 SKIP(2);
4184
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004185#ifdef LIBXML_CATALOG_ENABLED
4186 if (((state == XML_PARSER_MISC) ||
4187 (state == XML_PARSER_START)) &&
4188 (xmlStrEqual(target, XML_CATALOG_PI))) {
4189 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4190 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4191 (allow == XML_CATA_ALLOW_ALL))
4192 xmlParseCatalogPI(ctxt, buf);
4193 }
4194#endif
4195
4196
Owen Taylor3473f882001-02-23 17:55:21 +00004197 /*
4198 * SAX: PI detected.
4199 */
4200 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4201 (ctxt->sax->processingInstruction != NULL))
4202 ctxt->sax->processingInstruction(ctxt->userData,
4203 target, buf);
4204 }
4205 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004207 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004208 }
4209 ctxt->instate = state;
4210 }
4211}
4212
4213/**
4214 * xmlParseNotationDecl:
4215 * @ctxt: an XML parser context
4216 *
4217 * parse a notation declaration
4218 *
4219 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4220 *
4221 * Hence there is actually 3 choices:
4222 * 'PUBLIC' S PubidLiteral
4223 * 'PUBLIC' S PubidLiteral S SystemLiteral
4224 * and 'SYSTEM' S SystemLiteral
4225 *
4226 * See the NOTE on xmlParseExternalID().
4227 */
4228
4229void
4230xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004231 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004232 xmlChar *Pubid;
4233 xmlChar *Systemid;
4234
Daniel Veillarda07050d2003-10-19 14:46:32 +00004235 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004236 xmlParserInputPtr input = ctxt->input;
4237 SHRINK;
4238 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004239 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004240 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4241 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return;
4243 }
4244 SKIP_BLANKS;
4245
Daniel Veillard76d66f42001-05-16 21:05:17 +00004246 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004248 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004249 return;
4250 }
William M. Brack76e95df2003-10-18 16:20:14 +00004251 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004253 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return;
4255 }
4256 SKIP_BLANKS;
4257
4258 /*
4259 * Parse the IDs.
4260 */
4261 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4262 SKIP_BLANKS;
4263
4264 if (RAW == '>') {
4265 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004266 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4267 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 NEXT;
4270 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4271 (ctxt->sax->notationDecl != NULL))
4272 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4273 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004274 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004275 }
Owen Taylor3473f882001-02-23 17:55:21 +00004276 if (Systemid != NULL) xmlFree(Systemid);
4277 if (Pubid != NULL) xmlFree(Pubid);
4278 }
4279}
4280
4281/**
4282 * xmlParseEntityDecl:
4283 * @ctxt: an XML parser context
4284 *
4285 * parse <!ENTITY declarations
4286 *
4287 * [70] EntityDecl ::= GEDecl | PEDecl
4288 *
4289 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4290 *
4291 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4292 *
4293 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4294 *
4295 * [74] PEDef ::= EntityValue | ExternalID
4296 *
4297 * [76] NDataDecl ::= S 'NDATA' S Name
4298 *
4299 * [ VC: Notation Declared ]
4300 * The Name must match the declared name of a notation.
4301 */
4302
4303void
4304xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004305 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004306 xmlChar *value = NULL;
4307 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004308 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004309 int isParameter = 0;
4310 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004311 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004312
Daniel Veillard4c778d82005-01-23 17:37:44 +00004313 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004314 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004315 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 SHRINK;
4317 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004318 skipped = SKIP_BLANKS;
4319 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4321 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004322 }
Owen Taylor3473f882001-02-23 17:55:21 +00004323
4324 if (RAW == '%') {
4325 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004326 skipped = SKIP_BLANKS;
4327 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004328 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4329 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
Owen Taylor3473f882001-02-23 17:55:21 +00004331 isParameter = 1;
4332 }
4333
Daniel Veillard76d66f42001-05-16 21:05:17 +00004334 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004335 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004336 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4337 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004338 return;
4339 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004340 skipped = SKIP_BLANKS;
4341 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004342 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4343 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004344 }
Owen Taylor3473f882001-02-23 17:55:21 +00004345
Daniel Veillardf5582f12002-06-11 10:08:16 +00004346 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004347 /*
4348 * handle the various case of definitions...
4349 */
4350 if (isParameter) {
4351 if ((RAW == '"') || (RAW == '\'')) {
4352 value = xmlParseEntityValue(ctxt, &orig);
4353 if (value) {
4354 if ((ctxt->sax != NULL) &&
4355 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4356 ctxt->sax->entityDecl(ctxt->userData, name,
4357 XML_INTERNAL_PARAMETER_ENTITY,
4358 NULL, NULL, value);
4359 }
4360 } else {
4361 URI = xmlParseExternalID(ctxt, &literal, 1);
4362 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004363 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 if (URI) {
4366 xmlURIPtr uri;
4367
4368 uri = xmlParseURI((const char *) URI);
4369 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004370 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4371 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004372 /*
4373 * This really ought to be a well formedness error
4374 * but the XML Core WG decided otherwise c.f. issue
4375 * E26 of the XML erratas.
4376 */
Owen Taylor3473f882001-02-23 17:55:21 +00004377 } else {
4378 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004379 /*
4380 * Okay this is foolish to block those but not
4381 * invalid URIs.
4382 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004383 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004384 } else {
4385 if ((ctxt->sax != NULL) &&
4386 (!ctxt->disableSAX) &&
4387 (ctxt->sax->entityDecl != NULL))
4388 ctxt->sax->entityDecl(ctxt->userData, name,
4389 XML_EXTERNAL_PARAMETER_ENTITY,
4390 literal, URI, NULL);
4391 }
4392 xmlFreeURI(uri);
4393 }
4394 }
4395 }
4396 } else {
4397 if ((RAW == '"') || (RAW == '\'')) {
4398 value = xmlParseEntityValue(ctxt, &orig);
4399 if ((ctxt->sax != NULL) &&
4400 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4401 ctxt->sax->entityDecl(ctxt->userData, name,
4402 XML_INTERNAL_GENERAL_ENTITY,
4403 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004404 /*
4405 * For expat compatibility in SAX mode.
4406 */
4407 if ((ctxt->myDoc == NULL) ||
4408 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4409 if (ctxt->myDoc == NULL) {
4410 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4411 }
4412 if (ctxt->myDoc->intSubset == NULL)
4413 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4414 BAD_CAST "fake", NULL, NULL);
4415
Daniel Veillard1af9a412003-08-20 22:54:39 +00004416 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4417 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004418 }
Owen Taylor3473f882001-02-23 17:55:21 +00004419 } else {
4420 URI = xmlParseExternalID(ctxt, &literal, 1);
4421 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004422 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 if (URI) {
4425 xmlURIPtr uri;
4426
4427 uri = xmlParseURI((const char *)URI);
4428 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004429 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4430 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004431 /*
4432 * This really ought to be a well formedness error
4433 * but the XML Core WG decided otherwise c.f. issue
4434 * E26 of the XML erratas.
4435 */
Owen Taylor3473f882001-02-23 17:55:21 +00004436 } else {
4437 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004438 /*
4439 * Okay this is foolish to block those but not
4440 * invalid URIs.
4441 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004442 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 xmlFreeURI(uri);
4445 }
4446 }
William M. Brack76e95df2003-10-18 16:20:14 +00004447 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4449 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004450 }
4451 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004452 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004453 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4456 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004457 }
4458 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004459 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004460 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4461 (ctxt->sax->unparsedEntityDecl != NULL))
4462 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4463 literal, URI, ndata);
4464 } else {
4465 if ((ctxt->sax != NULL) &&
4466 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4467 ctxt->sax->entityDecl(ctxt->userData, name,
4468 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4469 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004470 /*
4471 * For expat compatibility in SAX mode.
4472 * assuming the entity repalcement was asked for
4473 */
4474 if ((ctxt->replaceEntities != 0) &&
4475 ((ctxt->myDoc == NULL) ||
4476 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4477 if (ctxt->myDoc == NULL) {
4478 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4479 }
4480
4481 if (ctxt->myDoc->intSubset == NULL)
4482 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4483 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004484 xmlSAX2EntityDecl(ctxt, name,
4485 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4486 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004487 }
Owen Taylor3473f882001-02-23 17:55:21 +00004488 }
4489 }
4490 }
4491 SKIP_BLANKS;
4492 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004493 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004494 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004495 } else {
4496 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004497 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4498 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004499 }
4500 NEXT;
4501 }
4502 if (orig != NULL) {
4503 /*
4504 * Ugly mechanism to save the raw entity value.
4505 */
4506 xmlEntityPtr cur = NULL;
4507
4508 if (isParameter) {
4509 if ((ctxt->sax != NULL) &&
4510 (ctxt->sax->getParameterEntity != NULL))
4511 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4512 } else {
4513 if ((ctxt->sax != NULL) &&
4514 (ctxt->sax->getEntity != NULL))
4515 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004516 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004517 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004518 }
Owen Taylor3473f882001-02-23 17:55:21 +00004519 }
4520 if (cur != NULL) {
4521 if (cur->orig != NULL)
4522 xmlFree(orig);
4523 else
4524 cur->orig = orig;
4525 } else
4526 xmlFree(orig);
4527 }
Owen Taylor3473f882001-02-23 17:55:21 +00004528 if (value != NULL) xmlFree(value);
4529 if (URI != NULL) xmlFree(URI);
4530 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532}
4533
4534/**
4535 * xmlParseDefaultDecl:
4536 * @ctxt: an XML parser context
4537 * @value: Receive a possible fixed default value for the attribute
4538 *
4539 * Parse an attribute default declaration
4540 *
4541 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4542 *
4543 * [ VC: Required Attribute ]
4544 * if the default declaration is the keyword #REQUIRED, then the
4545 * attribute must be specified for all elements of the type in the
4546 * attribute-list declaration.
4547 *
4548 * [ VC: Attribute Default Legal ]
4549 * The declared default value must meet the lexical constraints of
4550 * the declared attribute type c.f. xmlValidateAttributeDecl()
4551 *
4552 * [ VC: Fixed Attribute Default ]
4553 * if an attribute has a default value declared with the #FIXED
4554 * keyword, instances of that attribute must match the default value.
4555 *
4556 * [ WFC: No < in Attribute Values ]
4557 * handled in xmlParseAttValue()
4558 *
4559 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4560 * or XML_ATTRIBUTE_FIXED.
4561 */
4562
4563int
4564xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4565 int val;
4566 xmlChar *ret;
4567
4568 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004569 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004570 SKIP(9);
4571 return(XML_ATTRIBUTE_REQUIRED);
4572 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004573 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004574 SKIP(8);
4575 return(XML_ATTRIBUTE_IMPLIED);
4576 }
4577 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004578 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004579 SKIP(6);
4580 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004581 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004582 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4583 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004584 }
4585 SKIP_BLANKS;
4586 }
4587 ret = xmlParseAttValue(ctxt);
4588 ctxt->instate = XML_PARSER_DTD;
4589 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004590 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004591 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004592 } else
4593 *value = ret;
4594 return(val);
4595}
4596
4597/**
4598 * xmlParseNotationType:
4599 * @ctxt: an XML parser context
4600 *
4601 * parse an Notation attribute type.
4602 *
4603 * Note: the leading 'NOTATION' S part has already being parsed...
4604 *
4605 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4606 *
4607 * [ VC: Notation Attributes ]
4608 * Values of this type must match one of the notation names included
4609 * in the declaration; all notation names in the declaration must be declared.
4610 *
4611 * Returns: the notation attribute tree built while parsing
4612 */
4613
4614xmlEnumerationPtr
4615xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004616 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004617 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4618
4619 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004620 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004621 return(NULL);
4622 }
4623 SHRINK;
4624 do {
4625 NEXT;
4626 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004627 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004628 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004629 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4630 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004631 return(ret);
4632 }
4633 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (cur == NULL) return(ret);
4635 if (last == NULL) ret = last = cur;
4636 else {
4637 last->next = cur;
4638 last = cur;
4639 }
4640 SKIP_BLANKS;
4641 } while (RAW == '|');
4642 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004643 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004644 if ((last != NULL) && (last != ret))
4645 xmlFreeEnumeration(last);
4646 return(ret);
4647 }
4648 NEXT;
4649 return(ret);
4650}
4651
4652/**
4653 * xmlParseEnumerationType:
4654 * @ctxt: an XML parser context
4655 *
4656 * parse an Enumeration attribute type.
4657 *
4658 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4659 *
4660 * [ VC: Enumeration ]
4661 * Values of this type must match one of the Nmtoken tokens in
4662 * the declaration
4663 *
4664 * Returns: the enumeration attribute tree built while parsing
4665 */
4666
4667xmlEnumerationPtr
4668xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4669 xmlChar *name;
4670 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4671
4672 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004673 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004674 return(NULL);
4675 }
4676 SHRINK;
4677 do {
4678 NEXT;
4679 SKIP_BLANKS;
4680 name = xmlParseNmtoken(ctxt);
4681 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004682 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 return(ret);
4684 }
4685 cur = xmlCreateEnumeration(name);
4686 xmlFree(name);
4687 if (cur == NULL) return(ret);
4688 if (last == NULL) ret = last = cur;
4689 else {
4690 last->next = cur;
4691 last = cur;
4692 }
4693 SKIP_BLANKS;
4694 } while (RAW == '|');
4695 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 return(ret);
4698 }
4699 NEXT;
4700 return(ret);
4701}
4702
4703/**
4704 * xmlParseEnumeratedType:
4705 * @ctxt: an XML parser context
4706 * @tree: the enumeration tree built while parsing
4707 *
4708 * parse an Enumerated attribute type.
4709 *
4710 * [57] EnumeratedType ::= NotationType | Enumeration
4711 *
4712 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4713 *
4714 *
4715 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4716 */
4717
4718int
4719xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004720 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004721 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004722 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004723 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4724 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004725 return(0);
4726 }
4727 SKIP_BLANKS;
4728 *tree = xmlParseNotationType(ctxt);
4729 if (*tree == NULL) return(0);
4730 return(XML_ATTRIBUTE_NOTATION);
4731 }
4732 *tree = xmlParseEnumerationType(ctxt);
4733 if (*tree == NULL) return(0);
4734 return(XML_ATTRIBUTE_ENUMERATION);
4735}
4736
4737/**
4738 * xmlParseAttributeType:
4739 * @ctxt: an XML parser context
4740 * @tree: the enumeration tree built while parsing
4741 *
4742 * parse the Attribute list def for an element
4743 *
4744 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4745 *
4746 * [55] StringType ::= 'CDATA'
4747 *
4748 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4749 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4750 *
4751 * Validity constraints for attribute values syntax are checked in
4752 * xmlValidateAttributeValue()
4753 *
4754 * [ VC: ID ]
4755 * Values of type ID must match the Name production. A name must not
4756 * appear more than once in an XML document as a value of this type;
4757 * i.e., ID values must uniquely identify the elements which bear them.
4758 *
4759 * [ VC: One ID per Element Type ]
4760 * No element type may have more than one ID attribute specified.
4761 *
4762 * [ VC: ID Attribute Default ]
4763 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4764 *
4765 * [ VC: IDREF ]
4766 * Values of type IDREF must match the Name production, and values
4767 * of type IDREFS must match Names; each IDREF Name must match the value
4768 * of an ID attribute on some element in the XML document; i.e. IDREF
4769 * values must match the value of some ID attribute.
4770 *
4771 * [ VC: Entity Name ]
4772 * Values of type ENTITY must match the Name production, values
4773 * of type ENTITIES must match Names; each Entity Name must match the
4774 * name of an unparsed entity declared in the DTD.
4775 *
4776 * [ VC: Name Token ]
4777 * Values of type NMTOKEN must match the Nmtoken production; values
4778 * of type NMTOKENS must match Nmtokens.
4779 *
4780 * Returns the attribute type
4781 */
4782int
4783xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4784 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004785 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004786 SKIP(5);
4787 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004788 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004789 SKIP(6);
4790 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004791 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004792 SKIP(5);
4793 return(XML_ATTRIBUTE_IDREF);
4794 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4795 SKIP(2);
4796 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004797 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(6);
4799 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004800 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004801 SKIP(8);
4802 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004803 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004804 SKIP(8);
4805 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004806 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004807 SKIP(7);
4808 return(XML_ATTRIBUTE_NMTOKEN);
4809 }
4810 return(xmlParseEnumeratedType(ctxt, tree));
4811}
4812
4813/**
4814 * xmlParseAttributeListDecl:
4815 * @ctxt: an XML parser context
4816 *
4817 * : parse the Attribute list def for an element
4818 *
4819 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4820 *
4821 * [53] AttDef ::= S Name S AttType S DefaultDecl
4822 *
4823 */
4824void
4825xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004826 const xmlChar *elemName;
4827 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004828 xmlEnumerationPtr tree;
4829
Daniel Veillarda07050d2003-10-19 14:46:32 +00004830 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004831 xmlParserInputPtr input = ctxt->input;
4832
4833 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004834 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004835 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004836 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004837 }
4838 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004839 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4842 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return;
4844 }
4845 SKIP_BLANKS;
4846 GROW;
4847 while (RAW != '>') {
4848 const xmlChar *check = CUR_PTR;
4849 int type;
4850 int def;
4851 xmlChar *defaultValue = NULL;
4852
4853 GROW;
4854 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004855 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4858 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004859 break;
4860 }
4861 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004862 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004863 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004864 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004865 if (defaultValue != NULL)
4866 xmlFree(defaultValue);
4867 break;
4868 }
4869 SKIP_BLANKS;
4870
4871 type = xmlParseAttributeType(ctxt, &tree);
4872 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004873 if (defaultValue != NULL)
4874 xmlFree(defaultValue);
4875 break;
4876 }
4877
4878 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004879 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004880 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4881 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004882 if (defaultValue != NULL)
4883 xmlFree(defaultValue);
4884 if (tree != NULL)
4885 xmlFreeEnumeration(tree);
4886 break;
4887 }
4888 SKIP_BLANKS;
4889
4890 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4891 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004892 if (defaultValue != NULL)
4893 xmlFree(defaultValue);
4894 if (tree != NULL)
4895 xmlFreeEnumeration(tree);
4896 break;
4897 }
4898
4899 GROW;
4900 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004901 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004902 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004903 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004904 if (defaultValue != NULL)
4905 xmlFree(defaultValue);
4906 if (tree != NULL)
4907 xmlFreeEnumeration(tree);
4908 break;
4909 }
4910 SKIP_BLANKS;
4911 }
4912 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004913 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4914 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004915 if (defaultValue != NULL)
4916 xmlFree(defaultValue);
4917 if (tree != NULL)
4918 xmlFreeEnumeration(tree);
4919 break;
4920 }
4921 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4922 (ctxt->sax->attributeDecl != NULL))
4923 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4924 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004925 else if (tree != NULL)
4926 xmlFreeEnumeration(tree);
4927
4928 if ((ctxt->sax2) && (defaultValue != NULL) &&
4929 (def != XML_ATTRIBUTE_IMPLIED) &&
4930 (def != XML_ATTRIBUTE_REQUIRED)) {
4931 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4932 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004933 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4934 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 if (defaultValue != NULL)
4937 xmlFree(defaultValue);
4938 GROW;
4939 }
4940 if (RAW == '>') {
4941 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004942 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4943 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004944 }
4945 NEXT;
4946 }
Owen Taylor3473f882001-02-23 17:55:21 +00004947 }
4948}
4949
4950/**
4951 * xmlParseElementMixedContentDecl:
4952 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004953 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004954 *
4955 * parse the declaration for a Mixed Element content
4956 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4957 *
4958 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4959 * '(' S? '#PCDATA' S? ')'
4960 *
4961 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4962 *
4963 * [ VC: No Duplicate Types ]
4964 * The same name must not appear more than once in a single
4965 * mixed-content declaration.
4966 *
4967 * returns: the list of the xmlElementContentPtr describing the element choices
4968 */
4969xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004970xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004971 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004972 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004973
4974 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004975 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004976 SKIP(7);
4977 SKIP_BLANKS;
4978 SHRINK;
4979 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004981 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4982"Element content declaration doesn't start and stop in the same entity\n",
4983 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004984 }
Owen Taylor3473f882001-02-23 17:55:21 +00004985 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004986 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004987 if (RAW == '*') {
4988 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4989 NEXT;
4990 }
4991 return(ret);
4992 }
4993 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004994 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (ret == NULL) return(NULL);
4996 }
4997 while (RAW == '|') {
4998 NEXT;
4999 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005000 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 if (ret == NULL) return(NULL);
5002 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005003 if (cur != NULL)
5004 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005005 cur = ret;
5006 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005007 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005008 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005009 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005010 if (n->c1 != NULL)
5011 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005012 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005013 if (n != NULL)
5014 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005016 }
5017 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005018 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005019 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005020 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005021 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005022 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005023 return(NULL);
5024 }
5025 SKIP_BLANKS;
5026 GROW;
5027 }
5028 if ((RAW == ')') && (NXT(1) == '*')) {
5029 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005030 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005031 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005032 if (cur->c2 != NULL)
5033 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005034 }
5035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005036 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005037 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5038"Element content declaration doesn't start and stop in the same entity\n",
5039 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005040 }
Owen Taylor3473f882001-02-23 17:55:21 +00005041 SKIP(2);
5042 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005043 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005044 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005045 return(NULL);
5046 }
5047
5048 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005049 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 }
5051 return(ret);
5052}
5053
5054/**
5055 * xmlParseElementChildrenContentDecl:
5056 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005057 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005058 *
5059 * parse the declaration for a Mixed Element content
5060 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5061 *
5062 *
5063 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5064 *
5065 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5066 *
5067 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5068 *
5069 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5070 *
5071 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5072 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005073 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005074 * opening or closing parentheses in a choice, seq, or Mixed
5075 * construct is contained in the replacement text for a parameter
5076 * entity, both must be contained in the same replacement text. For
5077 * interoperability, if a parameter-entity reference appears in a
5078 * choice, seq, or Mixed construct, its replacement text should not
5079 * be empty, and neither the first nor last non-blank character of
5080 * the replacement text should be a connector (| or ,).
5081 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005082 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005083 * hierarchy.
5084 */
5085xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005087 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005088 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005089 xmlChar type = 0;
5090
5091 SKIP_BLANKS;
5092 GROW;
5093 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005094 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005095
Owen Taylor3473f882001-02-23 17:55:21 +00005096 /* Recurse on first child */
5097 NEXT;
5098 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005100 SKIP_BLANKS;
5101 GROW;
5102 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005103 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005104 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005105 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005106 return(NULL);
5107 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005108 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005109 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005110 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005111 return(NULL);
5112 }
Owen Taylor3473f882001-02-23 17:55:21 +00005113 GROW;
5114 if (RAW == '?') {
5115 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5116 NEXT;
5117 } else if (RAW == '*') {
5118 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5119 NEXT;
5120 } else if (RAW == '+') {
5121 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5122 NEXT;
5123 } else {
5124 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5125 }
Owen Taylor3473f882001-02-23 17:55:21 +00005126 GROW;
5127 }
5128 SKIP_BLANKS;
5129 SHRINK;
5130 while (RAW != ')') {
5131 /*
5132 * Each loop we parse one separator and one element.
5133 */
5134 if (RAW == ',') {
5135 if (type == 0) type = CUR;
5136
5137 /*
5138 * Detect "Name | Name , Name" error
5139 */
5140 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005142 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005144 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005145 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005146 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005147 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005148 return(NULL);
5149 }
5150 NEXT;
5151
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005152 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005153 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005154 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005155 xmlFreeDocElementContent(ctxt->myDoc, last);
5156 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005157 return(NULL);
5158 }
5159 if (last == NULL) {
5160 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005161 if (ret != NULL)
5162 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005163 ret = cur = op;
5164 } else {
5165 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005166 if (op != NULL)
5167 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005169 if (last != NULL)
5170 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 cur =op;
5172 last = NULL;
5173 }
5174 } else if (RAW == '|') {
5175 if (type == 0) type = CUR;
5176
5177 /*
5178 * Detect "Name , Name | Name" error
5179 */
5180 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005181 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005182 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005183 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005184 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005185 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005186 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005187 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005188 return(NULL);
5189 }
5190 NEXT;
5191
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005192 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005193 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005194 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005195 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005197 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 return(NULL);
5199 }
5200 if (last == NULL) {
5201 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005202 if (ret != NULL)
5203 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 ret = cur = op;
5205 } else {
5206 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005207 if (op != NULL)
5208 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005209 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005210 if (last != NULL)
5211 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005212 cur =op;
5213 last = NULL;
5214 }
5215 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005216 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005218 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 return(NULL);
5220 }
5221 GROW;
5222 SKIP_BLANKS;
5223 GROW;
5224 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005225 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005226 /* Recurse on second child */
5227 NEXT;
5228 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005229 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005230 SKIP_BLANKS;
5231 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005232 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005233 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005234 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005235 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005236 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005237 return(NULL);
5238 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 if (RAW == '?') {
5241 last->ocur = XML_ELEMENT_CONTENT_OPT;
5242 NEXT;
5243 } else if (RAW == '*') {
5244 last->ocur = XML_ELEMENT_CONTENT_MULT;
5245 NEXT;
5246 } else if (RAW == '+') {
5247 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5248 NEXT;
5249 } else {
5250 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5251 }
5252 }
5253 SKIP_BLANKS;
5254 GROW;
5255 }
5256 if ((cur != NULL) && (last != NULL)) {
5257 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005258 if (last != NULL)
5259 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005261 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005262 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5263"Element content declaration doesn't start and stop in the same entity\n",
5264 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005265 }
Owen Taylor3473f882001-02-23 17:55:21 +00005266 NEXT;
5267 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005268 if (ret != NULL) {
5269 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5270 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5271 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5272 else
5273 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5274 }
Owen Taylor3473f882001-02-23 17:55:21 +00005275 NEXT;
5276 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005277 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005278 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005279 cur = ret;
5280 /*
5281 * Some normalization:
5282 * (a | b* | c?)* == (a | b | c)*
5283 */
5284 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5285 if ((cur->c1 != NULL) &&
5286 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5287 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5288 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5289 if ((cur->c2 != NULL) &&
5290 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5291 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5292 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5293 cur = cur->c2;
5294 }
5295 }
Owen Taylor3473f882001-02-23 17:55:21 +00005296 NEXT;
5297 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005298 if (ret != NULL) {
5299 int found = 0;
5300
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005301 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5302 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5303 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005304 else
5305 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005306 /*
5307 * Some normalization:
5308 * (a | b*)+ == (a | b)*
5309 * (a | b?)+ == (a | b)*
5310 */
5311 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5312 if ((cur->c1 != NULL) &&
5313 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5314 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5315 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5316 found = 1;
5317 }
5318 if ((cur->c2 != NULL) &&
5319 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5320 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5321 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5322 found = 1;
5323 }
5324 cur = cur->c2;
5325 }
5326 if (found)
5327 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5328 }
Owen Taylor3473f882001-02-23 17:55:21 +00005329 NEXT;
5330 }
5331 return(ret);
5332}
5333
5334/**
5335 * xmlParseElementContentDecl:
5336 * @ctxt: an XML parser context
5337 * @name: the name of the element being defined.
5338 * @result: the Element Content pointer will be stored here if any
5339 *
5340 * parse the declaration for an Element content either Mixed or Children,
5341 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5342 *
5343 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5344 *
5345 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5346 */
5347
5348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005349xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005350 xmlElementContentPtr *result) {
5351
5352 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005353 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005354 int res;
5355
5356 *result = NULL;
5357
5358 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005359 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005360 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005361 return(-1);
5362 }
5363 NEXT;
5364 GROW;
5365 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005366 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005368 res = XML_ELEMENT_TYPE_MIXED;
5369 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005370 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005371 res = XML_ELEMENT_TYPE_ELEMENT;
5372 }
Owen Taylor3473f882001-02-23 17:55:21 +00005373 SKIP_BLANKS;
5374 *result = tree;
5375 return(res);
5376}
5377
5378/**
5379 * xmlParseElementDecl:
5380 * @ctxt: an XML parser context
5381 *
5382 * parse an Element declaration.
5383 *
5384 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5385 *
5386 * [ VC: Unique Element Type Declaration ]
5387 * No element type may be declared more than once
5388 *
5389 * Returns the type of the element, or -1 in case of error
5390 */
5391int
5392xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005393 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005394 int ret = -1;
5395 xmlElementContentPtr content = NULL;
5396
Daniel Veillard4c778d82005-01-23 17:37:44 +00005397 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005398 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005399 xmlParserInputPtr input = ctxt->input;
5400
5401 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005402 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005403 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5404 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005405 }
5406 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005407 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005408 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005409 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5410 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005411 return(-1);
5412 }
5413 while ((RAW == 0) && (ctxt->inputNr > 1))
5414 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005415 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5417 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005418 }
5419 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005420 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005421 SKIP(5);
5422 /*
5423 * Element must always be empty.
5424 */
5425 ret = XML_ELEMENT_TYPE_EMPTY;
5426 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5427 (NXT(2) == 'Y')) {
5428 SKIP(3);
5429 /*
5430 * Element is a generic container.
5431 */
5432 ret = XML_ELEMENT_TYPE_ANY;
5433 } else if (RAW == '(') {
5434 ret = xmlParseElementContentDecl(ctxt, name, &content);
5435 } else {
5436 /*
5437 * [ WFC: PEs in Internal Subset ] error handling.
5438 */
5439 if ((RAW == '%') && (ctxt->external == 0) &&
5440 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005441 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005443 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005444 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005445 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5446 }
Owen Taylor3473f882001-02-23 17:55:21 +00005447 return(-1);
5448 }
5449
5450 SKIP_BLANKS;
5451 /*
5452 * Pop-up of finished entities.
5453 */
5454 while ((RAW == 0) && (ctxt->inputNr > 1))
5455 xmlPopInput(ctxt);
5456 SKIP_BLANKS;
5457
5458 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005459 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005460 if (content != NULL) {
5461 xmlFreeDocElementContent(ctxt->myDoc, content);
5462 }
Owen Taylor3473f882001-02-23 17:55:21 +00005463 } else {
5464 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5466 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468
5469 NEXT;
5470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005471 (ctxt->sax->elementDecl != NULL)) {
5472 if (content != NULL)
5473 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005474 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5475 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005476 if ((content != NULL) && (content->parent == NULL)) {
5477 /*
5478 * this is a trick: if xmlAddElementDecl is called,
5479 * instead of copying the full tree it is plugged directly
5480 * if called from the parser. Avoid duplicating the
5481 * interfaces or change the API/ABI
5482 */
5483 xmlFreeDocElementContent(ctxt->myDoc, content);
5484 }
5485 } else if (content != NULL) {
5486 xmlFreeDocElementContent(ctxt->myDoc, content);
5487 }
Owen Taylor3473f882001-02-23 17:55:21 +00005488 }
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 return(ret);
5491}
5492
5493/**
Owen Taylor3473f882001-02-23 17:55:21 +00005494 * xmlParseConditionalSections
5495 * @ctxt: an XML parser context
5496 *
5497 * [61] conditionalSect ::= includeSect | ignoreSect
5498 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5499 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5500 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5501 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5502 */
5503
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005504static void
Owen Taylor3473f882001-02-23 17:55:21 +00005505xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5506 SKIP(3);
5507 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005508 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005509 SKIP(7);
5510 SKIP_BLANKS;
5511 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005512 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005513 } else {
5514 NEXT;
5515 }
5516 if (xmlParserDebugEntities) {
5517 if ((ctxt->input != NULL) && (ctxt->input->filename))
5518 xmlGenericError(xmlGenericErrorContext,
5519 "%s(%d): ", ctxt->input->filename,
5520 ctxt->input->line);
5521 xmlGenericError(xmlGenericErrorContext,
5522 "Entering INCLUDE Conditional Section\n");
5523 }
5524
5525 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5526 (NXT(2) != '>'))) {
5527 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005528 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005529
5530 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5531 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005532 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 NEXT;
5534 } else if (RAW == '%') {
5535 xmlParsePEReference(ctxt);
5536 } else
5537 xmlParseMarkupDecl(ctxt);
5538
5539 /*
5540 * Pop-up of finished entities.
5541 */
5542 while ((RAW == 0) && (ctxt->inputNr > 1))
5543 xmlPopInput(ctxt);
5544
Daniel Veillardfdc91562002-07-01 21:52:03 +00005545 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005546 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 break;
5548 }
5549 }
5550 if (xmlParserDebugEntities) {
5551 if ((ctxt->input != NULL) && (ctxt->input->filename))
5552 xmlGenericError(xmlGenericErrorContext,
5553 "%s(%d): ", ctxt->input->filename,
5554 ctxt->input->line);
5555 xmlGenericError(xmlGenericErrorContext,
5556 "Leaving INCLUDE Conditional Section\n");
5557 }
5558
Daniel Veillarda07050d2003-10-19 14:46:32 +00005559 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005561 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005562 int depth = 0;
5563
5564 SKIP(6);
5565 SKIP_BLANKS;
5566 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005567 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005568 } else {
5569 NEXT;
5570 }
5571 if (xmlParserDebugEntities) {
5572 if ((ctxt->input != NULL) && (ctxt->input->filename))
5573 xmlGenericError(xmlGenericErrorContext,
5574 "%s(%d): ", ctxt->input->filename,
5575 ctxt->input->line);
5576 xmlGenericError(xmlGenericErrorContext,
5577 "Entering IGNORE Conditional Section\n");
5578 }
5579
5580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005581 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005582 * But disable SAX event generating DTD building in the meantime
5583 */
5584 state = ctxt->disableSAX;
5585 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005586 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005587 ctxt->instate = XML_PARSER_IGNORE;
5588
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005589 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5591 depth++;
5592 SKIP(3);
5593 continue;
5594 }
5595 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5596 if (--depth >= 0) SKIP(3);
5597 continue;
5598 }
5599 NEXT;
5600 continue;
5601 }
5602
5603 ctxt->disableSAX = state;
5604 ctxt->instate = instate;
5605
5606 if (xmlParserDebugEntities) {
5607 if ((ctxt->input != NULL) && (ctxt->input->filename))
5608 xmlGenericError(xmlGenericErrorContext,
5609 "%s(%d): ", ctxt->input->filename,
5610 ctxt->input->line);
5611 xmlGenericError(xmlGenericErrorContext,
5612 "Leaving IGNORE Conditional Section\n");
5613 }
5614
5615 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
5618
5619 if (RAW == 0)
5620 SHRINK;
5621
5622 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005623 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005624 } else {
5625 SKIP(3);
5626 }
5627}
5628
5629/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005630 * xmlParseMarkupDecl:
5631 * @ctxt: an XML parser context
5632 *
5633 * parse Markup declarations
5634 *
5635 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5636 * NotationDecl | PI | Comment
5637 *
5638 * [ VC: Proper Declaration/PE Nesting ]
5639 * Parameter-entity replacement text must be properly nested with
5640 * markup declarations. That is to say, if either the first character
5641 * or the last character of a markup declaration (markupdecl above) is
5642 * contained in the replacement text for a parameter-entity reference,
5643 * both must be contained in the same replacement text.
5644 *
5645 * [ WFC: PEs in Internal Subset ]
5646 * In the internal DTD subset, parameter-entity references can occur
5647 * only where markup declarations can occur, not within markup declarations.
5648 * (This does not apply to references that occur in external parameter
5649 * entities or to the external subset.)
5650 */
5651void
5652xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5653 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005654 if (CUR == '<') {
5655 if (NXT(1) == '!') {
5656 switch (NXT(2)) {
5657 case 'E':
5658 if (NXT(3) == 'L')
5659 xmlParseElementDecl(ctxt);
5660 else if (NXT(3) == 'N')
5661 xmlParseEntityDecl(ctxt);
5662 break;
5663 case 'A':
5664 xmlParseAttributeListDecl(ctxt);
5665 break;
5666 case 'N':
5667 xmlParseNotationDecl(ctxt);
5668 break;
5669 case '-':
5670 xmlParseComment(ctxt);
5671 break;
5672 default:
5673 /* there is an error but it will be detected later */
5674 break;
5675 }
5676 } else if (NXT(1) == '?') {
5677 xmlParsePI(ctxt);
5678 }
5679 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005680 /*
5681 * This is only for internal subset. On external entities,
5682 * the replacement is done before parsing stage
5683 */
5684 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5685 xmlParsePEReference(ctxt);
5686
5687 /*
5688 * Conditional sections are allowed from entities included
5689 * by PE References in the internal subset.
5690 */
5691 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5692 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5693 xmlParseConditionalSections(ctxt);
5694 }
5695 }
5696
5697 ctxt->instate = XML_PARSER_DTD;
5698}
5699
5700/**
5701 * xmlParseTextDecl:
5702 * @ctxt: an XML parser context
5703 *
5704 * parse an XML declaration header for external entities
5705 *
5706 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5707 *
5708 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5709 */
5710
5711void
5712xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5713 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005714 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005715
5716 /*
5717 * We know that '<?xml' is here.
5718 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005719 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005720 SKIP(5);
5721 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005722 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005723 return;
5724 }
5725
William M. Brack76e95df2003-10-18 16:20:14 +00005726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5728 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005729 }
5730 SKIP_BLANKS;
5731
5732 /*
5733 * We may have the VersionInfo here.
5734 */
5735 version = xmlParseVersionInfo(ctxt);
5736 if (version == NULL)
5737 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005738 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5741 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005742 }
5743 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005744 ctxt->input->version = version;
5745
5746 /*
5747 * We must have the encoding declaration
5748 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005749 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5751 /*
5752 * The XML REC instructs us to stop parsing right here
5753 */
5754 return;
5755 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005756 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5757 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5758 "Missing encoding in text declaration\n");
5759 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005760
5761 SKIP_BLANKS;
5762 if ((RAW == '?') && (NXT(1) == '>')) {
5763 SKIP(2);
5764 } else if (RAW == '>') {
5765 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005766 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005767 NEXT;
5768 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005770 MOVETO_ENDTAG(CUR_PTR);
5771 NEXT;
5772 }
5773}
5774
5775/**
Owen Taylor3473f882001-02-23 17:55:21 +00005776 * xmlParseExternalSubset:
5777 * @ctxt: an XML parser context
5778 * @ExternalID: the external identifier
5779 * @SystemID: the system identifier (or URL)
5780 *
5781 * parse Markup declarations from an external subset
5782 *
5783 * [30] extSubset ::= textDecl? extSubsetDecl
5784 *
5785 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5786 */
5787void
5788xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5789 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005790 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005791 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005792 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005793 xmlParseTextDecl(ctxt);
5794 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5795 /*
5796 * The XML REC instructs us to stop parsing right here
5797 */
5798 ctxt->instate = XML_PARSER_EOF;
5799 return;
5800 }
5801 }
5802 if (ctxt->myDoc == NULL) {
5803 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5804 }
5805 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5806 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5807
5808 ctxt->instate = XML_PARSER_DTD;
5809 ctxt->external = 1;
5810 while (((RAW == '<') && (NXT(1) == '?')) ||
5811 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005812 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005813 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005814 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005815
5816 GROW;
5817 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5818 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005819 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005820 NEXT;
5821 } else if (RAW == '%') {
5822 xmlParsePEReference(ctxt);
5823 } else
5824 xmlParseMarkupDecl(ctxt);
5825
5826 /*
5827 * Pop-up of finished entities.
5828 */
5829 while ((RAW == 0) && (ctxt->inputNr > 1))
5830 xmlPopInput(ctxt);
5831
Daniel Veillardfdc91562002-07-01 21:52:03 +00005832 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005833 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005834 break;
5835 }
5836 }
5837
5838 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 }
5841
5842}
5843
5844/**
5845 * xmlParseReference:
5846 * @ctxt: an XML parser context
5847 *
5848 * parse and handle entity references in content, depending on the SAX
5849 * interface, this may end-up in a call to character() if this is a
5850 * CharRef, a predefined entity, if there is no reference() callback.
5851 * or if the parser was asked to switch to that mode.
5852 *
5853 * [67] Reference ::= EntityRef | CharRef
5854 */
5855void
5856xmlParseReference(xmlParserCtxtPtr ctxt) {
5857 xmlEntityPtr ent;
5858 xmlChar *val;
5859 if (RAW != '&') return;
5860
5861 if (NXT(1) == '#') {
5862 int i = 0;
5863 xmlChar out[10];
5864 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005865 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005866
5867 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5868 /*
5869 * So we are using non-UTF-8 buffers
5870 * Check that the char fit on 8bits, if not
5871 * generate a CharRef.
5872 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005873 if (value <= 0xFF) {
5874 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005875 out[1] = 0;
5876 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5877 (!ctxt->disableSAX))
5878 ctxt->sax->characters(ctxt->userData, out, 1);
5879 } else {
5880 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005881 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005883 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005884 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5885 (!ctxt->disableSAX))
5886 ctxt->sax->reference(ctxt->userData, out);
5887 }
5888 } else {
5889 /*
5890 * Just encode the value in UTF-8
5891 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005892 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005893 out[i] = 0;
5894 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5895 (!ctxt->disableSAX))
5896 ctxt->sax->characters(ctxt->userData, out, i);
5897 }
5898 } else {
5899 ent = xmlParseEntityRef(ctxt);
5900 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005901 if (!ctxt->wellFormed)
5902 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005903 if ((ent->name != NULL) &&
5904 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5905 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005906 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005907
5908
5909 /*
5910 * The first reference to the entity trigger a parsing phase
5911 * where the ent->children is filled with the result from
5912 * the parsing.
5913 */
5914 if (ent->children == NULL) {
5915 xmlChar *value;
5916 value = ent->content;
5917
5918 /*
5919 * Check that this entity is well formed
5920 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005921 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005922 (value[1] == 0) && (value[0] == '<') &&
5923 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5924 /*
5925 * DONE: get definite answer on this !!!
5926 * Lots of entity decls are used to declare a single
5927 * char
5928 * <!ENTITY lt "<">
5929 * Which seems to be valid since
5930 * 2.4: The ampersand character (&) and the left angle
5931 * bracket (<) may appear in their literal form only
5932 * when used ... They are also legal within the literal
5933 * entity value of an internal entity declaration;i
5934 * see "4.3.2 Well-Formed Parsed Entities".
5935 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5936 * Looking at the OASIS test suite and James Clark
5937 * tests, this is broken. However the XML REC uses
5938 * it. Is the XML REC not well-formed ????
5939 * This is a hack to avoid this problem
5940 *
5941 * ANSWER: since lt gt amp .. are already defined,
5942 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005943 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005944 * is lousy but acceptable.
5945 */
5946 list = xmlNewDocText(ctxt->myDoc, value);
5947 if (list != NULL) {
5948 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5949 (ent->children == NULL)) {
5950 ent->children = list;
5951 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005952 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005953 list->parent = (xmlNodePtr) ent;
5954 } else {
5955 xmlFreeNodeList(list);
5956 }
5957 } else if (list != NULL) {
5958 xmlFreeNodeList(list);
5959 }
5960 } else {
5961 /*
5962 * 4.3.2: An internal general parsed entity is well-formed
5963 * if its replacement text matches the production labeled
5964 * content.
5965 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005966
5967 void *user_data;
5968 /*
5969 * This is a bit hackish but this seems the best
5970 * way to make sure both SAX and DOM entity support
5971 * behaves okay.
5972 */
5973 if (ctxt->userData == ctxt)
5974 user_data = NULL;
5975 else
5976 user_data = ctxt->userData;
5977
Owen Taylor3473f882001-02-23 17:55:21 +00005978 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5979 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005980 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5981 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005982 ctxt->depth--;
5983 } else if (ent->etype ==
5984 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5985 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005986 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005987 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005988 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005989 ctxt->depth--;
5990 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005991 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005992 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5993 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005994 }
5995 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005996 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005997 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005998 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005999 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6000 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006001 (ent->children == NULL)) {
6002 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006003 if (ctxt->replaceEntities) {
6004 /*
6005 * Prune it directly in the generated document
6006 * except for single text nodes.
6007 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006008 if (((list->type == XML_TEXT_NODE) &&
6009 (list->next == NULL)) ||
6010 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006011 list->parent = (xmlNodePtr) ent;
6012 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006013 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006014 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006015 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006016 while (list != NULL) {
6017 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006018 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006019 if (list->next == NULL)
6020 ent->last = list;
6021 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006022 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006023 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006024#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006025 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6026 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006027#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006028 }
6029 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006030 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006031 while (list != NULL) {
6032 list->parent = (xmlNodePtr) ent;
6033 if (list->next == NULL)
6034 ent->last = list;
6035 list = list->next;
6036 }
Owen Taylor3473f882001-02-23 17:55:21 +00006037 }
6038 } else {
6039 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006040 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006041 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006042 } else if ((ret != XML_ERR_OK) &&
6043 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006044 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 } else if (list != NULL) {
6046 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006047 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006048 }
6049 }
6050 }
6051 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6052 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6053 /*
6054 * Create a node.
6055 */
6056 ctxt->sax->reference(ctxt->userData, ent->name);
6057 return;
6058 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006059 /*
6060 * There is a problem on the handling of _private for entities
6061 * (bug 155816): Should we copy the content of the field from
6062 * the entity (possibly overwriting some value set by the user
6063 * when a copy is created), should we leave it alone, or should
6064 * we try to take care of different situations? The problem
6065 * is exacerbated by the usage of this field by the xmlReader.
6066 * To fix this bug, we look at _private on the created node
6067 * and, if it's NULL, we copy in whatever was in the entity.
6068 * If it's not NULL we leave it alone. This is somewhat of a
6069 * hack - maybe we should have further tests to determine
6070 * what to do.
6071 */
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6073 /*
6074 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006075 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006076 * In the first occurrence list contains the replacement.
6077 * progressive == 2 means we are operating on the Reader
6078 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006079 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006080 if (((list == NULL) && (ent->owner == 0)) ||
6081 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006082 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006083
6084 /*
6085 * when operating on a reader, the entities definitions
6086 * are always owning the entities subtree.
6087 if (ctxt->parseMode == XML_PARSE_READER)
6088 ent->owner = 1;
6089 */
6090
Daniel Veillard62f313b2001-07-04 19:49:14 +00006091 cur = ent->children;
6092 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006093 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006094 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006095 if (nw->_private == NULL)
6096 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006097 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006098 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006099 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006100 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006101 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006102 if (cur == ent->last) {
6103 /*
6104 * needed to detect some strange empty
6105 * node cases in the reader tests
6106 */
6107 if ((ctxt->parseMode == XML_PARSE_READER) &&
6108 (nw->type == XML_ELEMENT_NODE) &&
6109 (nw->children == NULL))
6110 nw->extra = 1;
6111
Daniel Veillard62f313b2001-07-04 19:49:14 +00006112 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006113 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006114 cur = cur->next;
6115 }
Daniel Veillard81273902003-09-30 00:43:48 +00006116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006117 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006118 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006119#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006120 } else if (list == NULL) {
6121 xmlNodePtr nw = NULL, cur, next, last,
6122 firstChild = NULL;
6123 /*
6124 * Copy the entity child list and make it the new
6125 * entity child list. The goal is to make sure any
6126 * ID or REF referenced will be the one from the
6127 * document content and not the entity copy.
6128 */
6129 cur = ent->children;
6130 ent->children = NULL;
6131 last = ent->last;
6132 ent->last = NULL;
6133 while (cur != NULL) {
6134 next = cur->next;
6135 cur->next = NULL;
6136 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006137 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006138 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006139 if (nw->_private == NULL)
6140 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006141 if (firstChild == NULL){
6142 firstChild = cur;
6143 }
6144 xmlAddChild((xmlNodePtr) ent, nw);
6145 xmlAddChild(ctxt->node, cur);
6146 }
6147 if (cur == last)
6148 break;
6149 cur = next;
6150 }
6151 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006152#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006153 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6154 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006155#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006156 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006157 const xmlChar *nbktext;
6158
Daniel Veillard62f313b2001-07-04 19:49:14 +00006159 /*
6160 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006161 * node with a possible previous text one which
6162 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006163 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006164 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6165 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006166 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006167 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006168 if ((ent->last != ent->children) &&
6169 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006170 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006171 xmlAddChildList(ctxt->node, ent->children);
6172 }
6173
Owen Taylor3473f882001-02-23 17:55:21 +00006174 /*
6175 * This is to avoid a nasty side effect, see
6176 * characters() in SAX.c
6177 */
6178 ctxt->nodemem = 0;
6179 ctxt->nodelen = 0;
6180 return;
6181 } else {
6182 /*
6183 * Probably running in SAX mode
6184 */
6185 xmlParserInputPtr input;
6186
6187 input = xmlNewEntityInputStream(ctxt, ent);
6188 xmlPushInput(ctxt, input);
6189 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006190 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6191 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006192 xmlParseTextDecl(ctxt);
6193 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6194 /*
6195 * The XML REC instructs us to stop parsing right here
6196 */
6197 ctxt->instate = XML_PARSER_EOF;
6198 return;
6199 }
6200 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006201 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6202 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
6204 }
6205 return;
6206 }
6207 }
6208 } else {
6209 val = ent->content;
6210 if (val == NULL) return;
6211 /*
6212 * inline the entity.
6213 */
6214 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6215 (!ctxt->disableSAX))
6216 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6217 }
6218 }
6219}
6220
6221/**
6222 * xmlParseEntityRef:
6223 * @ctxt: an XML parser context
6224 *
6225 * parse ENTITY references declarations
6226 *
6227 * [68] EntityRef ::= '&' Name ';'
6228 *
6229 * [ WFC: Entity Declared ]
6230 * In a document without any DTD, a document with only an internal DTD
6231 * subset which contains no parameter entity references, or a document
6232 * with "standalone='yes'", the Name given in the entity reference
6233 * must match that in an entity declaration, except that well-formed
6234 * documents need not declare any of the following entities: amp, lt,
6235 * gt, apos, quot. The declaration of a parameter entity must precede
6236 * any reference to it. Similarly, the declaration of a general entity
6237 * must precede any reference to it which appears in a default value in an
6238 * attribute-list declaration. Note that if entities are declared in the
6239 * external subset or in external parameter entities, a non-validating
6240 * processor is not obligated to read and process their declarations;
6241 * for such documents, the rule that an entity must be declared is a
6242 * well-formedness constraint only if standalone='yes'.
6243 *
6244 * [ WFC: Parsed Entity ]
6245 * An entity reference must not contain the name of an unparsed entity
6246 *
6247 * Returns the xmlEntityPtr if found, or NULL otherwise.
6248 */
6249xmlEntityPtr
6250xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006251 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006252 xmlEntityPtr ent = NULL;
6253
6254 GROW;
6255
6256 if (RAW == '&') {
6257 NEXT;
6258 name = xmlParseName(ctxt);
6259 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006260 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6261 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006262 } else {
6263 if (RAW == ';') {
6264 NEXT;
6265 /*
6266 * Ask first SAX for entity resolution, otherwise try the
6267 * predefined set.
6268 */
6269 if (ctxt->sax != NULL) {
6270 if (ctxt->sax->getEntity != NULL)
6271 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006272 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006273 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006274 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6275 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006276 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006277 }
Owen Taylor3473f882001-02-23 17:55:21 +00006278 }
6279 /*
6280 * [ WFC: Entity Declared ]
6281 * In a document without any DTD, a document with only an
6282 * internal DTD subset which contains no parameter entity
6283 * references, or a document with "standalone='yes'", the
6284 * Name given in the entity reference must match that in an
6285 * entity declaration, except that well-formed documents
6286 * need not declare any of the following entities: amp, lt,
6287 * gt, apos, quot.
6288 * The declaration of a parameter entity must precede any
6289 * reference to it.
6290 * Similarly, the declaration of a general entity must
6291 * precede any reference to it which appears in a default
6292 * value in an attribute-list declaration. Note that if
6293 * entities are declared in the external subset or in
6294 * external parameter entities, a non-validating processor
6295 * is not obligated to read and process their declarations;
6296 * for such documents, the rule that an entity must be
6297 * declared is a well-formedness constraint only if
6298 * standalone='yes'.
6299 */
6300 if (ent == NULL) {
6301 if ((ctxt->standalone == 1) ||
6302 ((ctxt->hasExternalSubset == 0) &&
6303 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006304 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006305 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006307 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006308 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006309 if ((ctxt->inSubset == 0) &&
6310 (ctxt->sax != NULL) &&
6311 (ctxt->sax->reference != NULL)) {
6312 ctxt->sax->reference(ctxt, name);
6313 }
Owen Taylor3473f882001-02-23 17:55:21 +00006314 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006315 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006316 }
6317
6318 /*
6319 * [ WFC: Parsed Entity ]
6320 * An entity reference must not contain the name of an
6321 * unparsed entity
6322 */
6323 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006324 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006325 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006326 }
6327
6328 /*
6329 * [ WFC: No External Entity References ]
6330 * Attribute values cannot contain direct or indirect
6331 * entity references to external entities.
6332 */
6333 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006335 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6336 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 }
6338 /*
6339 * [ WFC: No < in Attribute Values ]
6340 * The replacement text of any entity referred to directly or
6341 * indirectly in an attribute value (other than "&lt;") must
6342 * not contain a <.
6343 */
6344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6345 (ent != NULL) &&
6346 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6347 (ent->content != NULL) &&
6348 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006349 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006350 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 }
6352
6353 /*
6354 * Internal check, no parameter entities here ...
6355 */
6356 else {
6357 switch (ent->etype) {
6358 case XML_INTERNAL_PARAMETER_ENTITY:
6359 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006360 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6361 "Attempt to reference the parameter entity '%s'\n",
6362 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006363 break;
6364 default:
6365 break;
6366 }
6367 }
6368
6369 /*
6370 * [ WFC: No Recursion ]
6371 * A parsed entity must not contain a recursive reference
6372 * to itself, either directly or indirectly.
6373 * Done somewhere else
6374 */
6375
6376 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006377 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 }
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380 }
6381 return(ent);
6382}
6383
6384/**
6385 * xmlParseStringEntityRef:
6386 * @ctxt: an XML parser context
6387 * @str: a pointer to an index in the string
6388 *
6389 * parse ENTITY references declarations, but this version parses it from
6390 * a string value.
6391 *
6392 * [68] EntityRef ::= '&' Name ';'
6393 *
6394 * [ WFC: Entity Declared ]
6395 * In a document without any DTD, a document with only an internal DTD
6396 * subset which contains no parameter entity references, or a document
6397 * with "standalone='yes'", the Name given in the entity reference
6398 * must match that in an entity declaration, except that well-formed
6399 * documents need not declare any of the following entities: amp, lt,
6400 * gt, apos, quot. The declaration of a parameter entity must precede
6401 * any reference to it. Similarly, the declaration of a general entity
6402 * must precede any reference to it which appears in a default value in an
6403 * attribute-list declaration. Note that if entities are declared in the
6404 * external subset or in external parameter entities, a non-validating
6405 * processor is not obligated to read and process their declarations;
6406 * for such documents, the rule that an entity must be declared is a
6407 * well-formedness constraint only if standalone='yes'.
6408 *
6409 * [ WFC: Parsed Entity ]
6410 * An entity reference must not contain the name of an unparsed entity
6411 *
6412 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6413 * is updated to the current location in the string.
6414 */
6415xmlEntityPtr
6416xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6417 xmlChar *name;
6418 const xmlChar *ptr;
6419 xmlChar cur;
6420 xmlEntityPtr ent = NULL;
6421
6422 if ((str == NULL) || (*str == NULL))
6423 return(NULL);
6424 ptr = *str;
6425 cur = *ptr;
6426 if (cur == '&') {
6427 ptr++;
6428 cur = *ptr;
6429 name = xmlParseStringName(ctxt, &ptr);
6430 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006431 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6432 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006433 } else {
6434 if (*ptr == ';') {
6435 ptr++;
6436 /*
6437 * Ask first SAX for entity resolution, otherwise try the
6438 * predefined set.
6439 */
6440 if (ctxt->sax != NULL) {
6441 if (ctxt->sax->getEntity != NULL)
6442 ent = ctxt->sax->getEntity(ctxt->userData, name);
6443 if (ent == NULL)
6444 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006445 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006446 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006447 }
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
6449 /*
6450 * [ WFC: Entity Declared ]
6451 * In a document without any DTD, a document with only an
6452 * internal DTD subset which contains no parameter entity
6453 * references, or a document with "standalone='yes'", the
6454 * Name given in the entity reference must match that in an
6455 * entity declaration, except that well-formed documents
6456 * need not declare any of the following entities: amp, lt,
6457 * gt, apos, quot.
6458 * The declaration of a parameter entity must precede any
6459 * reference to it.
6460 * Similarly, the declaration of a general entity must
6461 * precede any reference to it which appears in a default
6462 * value in an attribute-list declaration. Note that if
6463 * entities are declared in the external subset or in
6464 * external parameter entities, a non-validating processor
6465 * is not obligated to read and process their declarations;
6466 * for such documents, the rule that an entity must be
6467 * declared is a well-formedness constraint only if
6468 * standalone='yes'.
6469 */
6470 if (ent == NULL) {
6471 if ((ctxt->standalone == 1) ||
6472 ((ctxt->hasExternalSubset == 0) &&
6473 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006474 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006475 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006477 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006478 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006479 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006481 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006482 }
6483
6484 /*
6485 * [ WFC: Parsed Entity ]
6486 * An entity reference must not contain the name of an
6487 * unparsed entity
6488 */
6489 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006490 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006491 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 }
6493
6494 /*
6495 * [ WFC: No External Entity References ]
6496 * Attribute values cannot contain direct or indirect
6497 * entity references to external entities.
6498 */
6499 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6500 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006501 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006502 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006503 }
6504 /*
6505 * [ WFC: No < in Attribute Values ]
6506 * The replacement text of any entity referred to directly or
6507 * indirectly in an attribute value (other than "&lt;") must
6508 * not contain a <.
6509 */
6510 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6511 (ent != NULL) &&
6512 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6513 (ent->content != NULL) &&
6514 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006515 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6516 "'<' in entity '%s' is not allowed in attributes values\n",
6517 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 }
6519
6520 /*
6521 * Internal check, no parameter entities here ...
6522 */
6523 else {
6524 switch (ent->etype) {
6525 case XML_INTERNAL_PARAMETER_ENTITY:
6526 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006527 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6528 "Attempt to reference the parameter entity '%s'\n",
6529 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006530 break;
6531 default:
6532 break;
6533 }
6534 }
6535
6536 /*
6537 * [ WFC: No Recursion ]
6538 * A parsed entity must not contain a recursive reference
6539 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006540 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006541 */
6542
6543 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006544 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006545 }
6546 xmlFree(name);
6547 }
6548 }
6549 *str = ptr;
6550 return(ent);
6551}
6552
6553/**
6554 * xmlParsePEReference:
6555 * @ctxt: an XML parser context
6556 *
6557 * parse PEReference declarations
6558 * The entity content is handled directly by pushing it's content as
6559 * a new input stream.
6560 *
6561 * [69] PEReference ::= '%' Name ';'
6562 *
6563 * [ WFC: No Recursion ]
6564 * A parsed entity must not contain a recursive
6565 * reference to itself, either directly or indirectly.
6566 *
6567 * [ WFC: Entity Declared ]
6568 * In a document without any DTD, a document with only an internal DTD
6569 * subset which contains no parameter entity references, or a document
6570 * with "standalone='yes'", ... ... The declaration of a parameter
6571 * entity must precede any reference to it...
6572 *
6573 * [ VC: Entity Declared ]
6574 * In a document with an external subset or external parameter entities
6575 * with "standalone='no'", ... ... The declaration of a parameter entity
6576 * must precede any reference to it...
6577 *
6578 * [ WFC: In DTD ]
6579 * Parameter-entity references may only appear in the DTD.
6580 * NOTE: misleading but this is handled.
6581 */
6582void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006583xmlParsePEReference(xmlParserCtxtPtr ctxt)
6584{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006585 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006586 xmlEntityPtr entity = NULL;
6587 xmlParserInputPtr input;
6588
6589 if (RAW == '%') {
6590 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006591 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006592 if (name == NULL) {
6593 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6594 "xmlParsePEReference: no name\n");
6595 } else {
6596 if (RAW == ';') {
6597 NEXT;
6598 if ((ctxt->sax != NULL) &&
6599 (ctxt->sax->getParameterEntity != NULL))
6600 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6601 name);
6602 if (entity == NULL) {
6603 /*
6604 * [ WFC: Entity Declared ]
6605 * In a document without any DTD, a document with only an
6606 * internal DTD subset which contains no parameter entity
6607 * references, or a document with "standalone='yes'", ...
6608 * ... The declaration of a parameter entity must precede
6609 * any reference to it...
6610 */
6611 if ((ctxt->standalone == 1) ||
6612 ((ctxt->hasExternalSubset == 0) &&
6613 (ctxt->hasPErefs == 0))) {
6614 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6615 "PEReference: %%%s; not found\n",
6616 name);
6617 } else {
6618 /*
6619 * [ VC: Entity Declared ]
6620 * In a document with an external subset or external
6621 * parameter entities with "standalone='no'", ...
6622 * ... The declaration of a parameter entity must
6623 * precede any reference to it...
6624 */
6625 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6626 "PEReference: %%%s; not found\n",
6627 name, NULL);
6628 ctxt->valid = 0;
6629 }
6630 } else {
6631 /*
6632 * Internal checking in case the entity quest barfed
6633 */
6634 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6635 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6636 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6637 "Internal: %%%s; is not a parameter entity\n",
6638 name, NULL);
6639 } else if (ctxt->input->free != deallocblankswrapper) {
6640 input =
6641 xmlNewBlanksWrapperInputStream(ctxt, entity);
6642 xmlPushInput(ctxt, input);
6643 } else {
6644 /*
6645 * TODO !!!
6646 * handle the extra spaces added before and after
6647 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6648 */
6649 input = xmlNewEntityInputStream(ctxt, entity);
6650 xmlPushInput(ctxt, input);
6651 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006652 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006653 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006654 xmlParseTextDecl(ctxt);
6655 if (ctxt->errNo ==
6656 XML_ERR_UNSUPPORTED_ENCODING) {
6657 /*
6658 * The XML REC instructs us to stop parsing
6659 * right here
6660 */
6661 ctxt->instate = XML_PARSER_EOF;
6662 return;
6663 }
6664 }
6665 }
6666 }
6667 ctxt->hasPErefs = 1;
6668 } else {
6669 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6670 }
6671 }
Owen Taylor3473f882001-02-23 17:55:21 +00006672 }
6673}
6674
6675/**
6676 * xmlParseStringPEReference:
6677 * @ctxt: an XML parser context
6678 * @str: a pointer to an index in the string
6679 *
6680 * parse PEReference declarations
6681 *
6682 * [69] PEReference ::= '%' Name ';'
6683 *
6684 * [ WFC: No Recursion ]
6685 * A parsed entity must not contain a recursive
6686 * reference to itself, either directly or indirectly.
6687 *
6688 * [ WFC: Entity Declared ]
6689 * In a document without any DTD, a document with only an internal DTD
6690 * subset which contains no parameter entity references, or a document
6691 * with "standalone='yes'", ... ... The declaration of a parameter
6692 * entity must precede any reference to it...
6693 *
6694 * [ VC: Entity Declared ]
6695 * In a document with an external subset or external parameter entities
6696 * with "standalone='no'", ... ... The declaration of a parameter entity
6697 * must precede any reference to it...
6698 *
6699 * [ WFC: In DTD ]
6700 * Parameter-entity references may only appear in the DTD.
6701 * NOTE: misleading but this is handled.
6702 *
6703 * Returns the string of the entity content.
6704 * str is updated to the current value of the index
6705 */
6706xmlEntityPtr
6707xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6708 const xmlChar *ptr;
6709 xmlChar cur;
6710 xmlChar *name;
6711 xmlEntityPtr entity = NULL;
6712
6713 if ((str == NULL) || (*str == NULL)) return(NULL);
6714 ptr = *str;
6715 cur = *ptr;
6716 if (cur == '%') {
6717 ptr++;
6718 cur = *ptr;
6719 name = xmlParseStringName(ctxt, &ptr);
6720 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006721 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6722 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006723 } else {
6724 cur = *ptr;
6725 if (cur == ';') {
6726 ptr++;
6727 cur = *ptr;
6728 if ((ctxt->sax != NULL) &&
6729 (ctxt->sax->getParameterEntity != NULL))
6730 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6731 name);
6732 if (entity == NULL) {
6733 /*
6734 * [ WFC: Entity Declared ]
6735 * In a document without any DTD, a document with only an
6736 * internal DTD subset which contains no parameter entity
6737 * references, or a document with "standalone='yes'", ...
6738 * ... The declaration of a parameter entity must precede
6739 * any reference to it...
6740 */
6741 if ((ctxt->standalone == 1) ||
6742 ((ctxt->hasExternalSubset == 0) &&
6743 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006744 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006745 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006746 } else {
6747 /*
6748 * [ VC: Entity Declared ]
6749 * In a document with an external subset or external
6750 * parameter entities with "standalone='no'", ...
6751 * ... The declaration of a parameter entity must
6752 * precede any reference to it...
6753 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006754 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6755 "PEReference: %%%s; not found\n",
6756 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006757 ctxt->valid = 0;
6758 }
6759 } else {
6760 /*
6761 * Internal checking in case the entity quest barfed
6762 */
6763 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6764 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006765 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6766 "%%%s; is not a parameter entity\n",
6767 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006768 }
6769 }
6770 ctxt->hasPErefs = 1;
6771 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006772 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 xmlFree(name);
6775 }
6776 }
6777 *str = ptr;
6778 return(entity);
6779}
6780
6781/**
6782 * xmlParseDocTypeDecl:
6783 * @ctxt: an XML parser context
6784 *
6785 * parse a DOCTYPE declaration
6786 *
6787 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6788 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6789 *
6790 * [ VC: Root Element Type ]
6791 * The Name in the document type declaration must match the element
6792 * type of the root element.
6793 */
6794
6795void
6796xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006797 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 xmlChar *ExternalID = NULL;
6799 xmlChar *URI = NULL;
6800
6801 /*
6802 * We know that '<!DOCTYPE' has been detected.
6803 */
6804 SKIP(9);
6805
6806 SKIP_BLANKS;
6807
6808 /*
6809 * Parse the DOCTYPE name.
6810 */
6811 name = xmlParseName(ctxt);
6812 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006813 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6814 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006815 }
6816 ctxt->intSubName = name;
6817
6818 SKIP_BLANKS;
6819
6820 /*
6821 * Check for SystemID and ExternalID
6822 */
6823 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6824
6825 if ((URI != NULL) || (ExternalID != NULL)) {
6826 ctxt->hasExternalSubset = 1;
6827 }
6828 ctxt->extSubURI = URI;
6829 ctxt->extSubSystem = ExternalID;
6830
6831 SKIP_BLANKS;
6832
6833 /*
6834 * Create and update the internal subset.
6835 */
6836 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6837 (!ctxt->disableSAX))
6838 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6839
6840 /*
6841 * Is there any internal subset declarations ?
6842 * they are handled separately in xmlParseInternalSubset()
6843 */
6844 if (RAW == '[')
6845 return;
6846
6847 /*
6848 * We should be at the end of the DOCTYPE declaration.
6849 */
6850 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006851 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006852 }
6853 NEXT;
6854}
6855
6856/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006857 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006858 * @ctxt: an XML parser context
6859 *
6860 * parse the internal subset declaration
6861 *
6862 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6863 */
6864
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006865static void
Owen Taylor3473f882001-02-23 17:55:21 +00006866xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6867 /*
6868 * Is there any DTD definition ?
6869 */
6870 if (RAW == '[') {
6871 ctxt->instate = XML_PARSER_DTD;
6872 NEXT;
6873 /*
6874 * Parse the succession of Markup declarations and
6875 * PEReferences.
6876 * Subsequence (markupdecl | PEReference | S)*
6877 */
6878 while (RAW != ']') {
6879 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006880 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006881
6882 SKIP_BLANKS;
6883 xmlParseMarkupDecl(ctxt);
6884 xmlParsePEReference(ctxt);
6885
6886 /*
6887 * Pop-up of finished entities.
6888 */
6889 while ((RAW == 0) && (ctxt->inputNr > 1))
6890 xmlPopInput(ctxt);
6891
6892 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006893 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006894 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006895 break;
6896 }
6897 }
6898 if (RAW == ']') {
6899 NEXT;
6900 SKIP_BLANKS;
6901 }
6902 }
6903
6904 /*
6905 * We should be at the end of the DOCTYPE declaration.
6906 */
6907 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006908 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006909 }
6910 NEXT;
6911}
6912
Daniel Veillard81273902003-09-30 00:43:48 +00006913#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006914/**
6915 * xmlParseAttribute:
6916 * @ctxt: an XML parser context
6917 * @value: a xmlChar ** used to store the value of the attribute
6918 *
6919 * parse an attribute
6920 *
6921 * [41] Attribute ::= Name Eq AttValue
6922 *
6923 * [ WFC: No External Entity References ]
6924 * Attribute values cannot contain direct or indirect entity references
6925 * to external entities.
6926 *
6927 * [ WFC: No < in Attribute Values ]
6928 * The replacement text of any entity referred to directly or indirectly in
6929 * an attribute value (other than "&lt;") must not contain a <.
6930 *
6931 * [ VC: Attribute Value Type ]
6932 * The attribute must have been declared; the value must be of the type
6933 * declared for it.
6934 *
6935 * [25] Eq ::= S? '=' S?
6936 *
6937 * With namespace:
6938 *
6939 * [NS 11] Attribute ::= QName Eq AttValue
6940 *
6941 * Also the case QName == xmlns:??? is handled independently as a namespace
6942 * definition.
6943 *
6944 * Returns the attribute name, and the value in *value.
6945 */
6946
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006947const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006948xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006949 const xmlChar *name;
6950 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006951
6952 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006953 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006954 name = xmlParseName(ctxt);
6955 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006956 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006957 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006958 return(NULL);
6959 }
6960
6961 /*
6962 * read the value
6963 */
6964 SKIP_BLANKS;
6965 if (RAW == '=') {
6966 NEXT;
6967 SKIP_BLANKS;
6968 val = xmlParseAttValue(ctxt);
6969 ctxt->instate = XML_PARSER_CONTENT;
6970 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006971 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006972 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 return(NULL);
6974 }
6975
6976 /*
6977 * Check that xml:lang conforms to the specification
6978 * No more registered as an error, just generate a warning now
6979 * since this was deprecated in XML second edition
6980 */
6981 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6982 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006983 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6984 "Malformed value for xml:lang : %s\n",
6985 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006986 }
6987 }
6988
6989 /*
6990 * Check that xml:space conforms to the specification
6991 */
6992 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6993 if (xmlStrEqual(val, BAD_CAST "default"))
6994 *(ctxt->space) = 0;
6995 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6996 *(ctxt->space) = 1;
6997 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00006998 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006999"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007000 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 }
7002 }
7003
7004 *value = val;
7005 return(name);
7006}
7007
7008/**
7009 * xmlParseStartTag:
7010 * @ctxt: an XML parser context
7011 *
7012 * parse a start of tag either for rule element or
7013 * EmptyElement. In both case we don't parse the tag closing chars.
7014 *
7015 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7016 *
7017 * [ WFC: Unique Att Spec ]
7018 * No attribute name may appear more than once in the same start-tag or
7019 * empty-element tag.
7020 *
7021 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7022 *
7023 * [ WFC: Unique Att Spec ]
7024 * No attribute name may appear more than once in the same start-tag or
7025 * empty-element tag.
7026 *
7027 * With namespace:
7028 *
7029 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7030 *
7031 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7032 *
7033 * Returns the element name parsed
7034 */
7035
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007036const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007037xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007038 const xmlChar *name;
7039 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007040 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007041 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007042 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007043 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 int i;
7045
7046 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007047 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007048
7049 name = xmlParseName(ctxt);
7050 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007051 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007052 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007053 return(NULL);
7054 }
7055
7056 /*
7057 * Now parse the attributes, it ends up with the ending
7058 *
7059 * (S Attribute)* S?
7060 */
7061 SKIP_BLANKS;
7062 GROW;
7063
Daniel Veillard21a0f912001-02-25 19:54:14 +00007064 while ((RAW != '>') &&
7065 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007066 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007067 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007068 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007069
7070 attname = xmlParseAttribute(ctxt, &attvalue);
7071 if ((attname != NULL) && (attvalue != NULL)) {
7072 /*
7073 * [ WFC: Unique Att Spec ]
7074 * No attribute name may appear more than once in the same
7075 * start-tag or empty-element tag.
7076 */
7077 for (i = 0; i < nbatts;i += 2) {
7078 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007079 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007080 xmlFree(attvalue);
7081 goto failed;
7082 }
7083 }
Owen Taylor3473f882001-02-23 17:55:21 +00007084 /*
7085 * Add the pair to atts
7086 */
7087 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007088 maxatts = 22; /* allow for 10 attrs by default */
7089 atts = (const xmlChar **)
7090 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007091 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007092 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007093 if (attvalue != NULL)
7094 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007095 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007096 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007097 ctxt->atts = atts;
7098 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007099 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007100 const xmlChar **n;
7101
Owen Taylor3473f882001-02-23 17:55:21 +00007102 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007103 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007104 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007105 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007106 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007107 if (attvalue != NULL)
7108 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007109 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007110 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007111 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007112 ctxt->atts = atts;
7113 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007114 }
7115 atts[nbatts++] = attname;
7116 atts[nbatts++] = attvalue;
7117 atts[nbatts] = NULL;
7118 atts[nbatts + 1] = NULL;
7119 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007120 if (attvalue != NULL)
7121 xmlFree(attvalue);
7122 }
7123
7124failed:
7125
Daniel Veillard3772de32002-12-17 10:31:45 +00007126 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007127 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7128 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007129 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7131 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007132 }
7133 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007134 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7135 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007136 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7137 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007138 break;
7139 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007140 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007141 GROW;
7142 }
7143
7144 /*
7145 * SAX: Start of Element !
7146 */
7147 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007148 (!ctxt->disableSAX)) {
7149 if (nbatts > 0)
7150 ctxt->sax->startElement(ctxt->userData, name, atts);
7151 else
7152 ctxt->sax->startElement(ctxt->userData, name, NULL);
7153 }
Owen Taylor3473f882001-02-23 17:55:21 +00007154
7155 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007156 /* Free only the content strings */
7157 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007158 if (atts[i] != NULL)
7159 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007160 }
7161 return(name);
7162}
7163
7164/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007165 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007166 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007167 * @line: line of the start tag
7168 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007169 *
7170 * parse an end of tag
7171 *
7172 * [42] ETag ::= '</' Name S? '>'
7173 *
7174 * With namespace
7175 *
7176 * [NS 9] ETag ::= '</' QName S? '>'
7177 */
7178
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007179static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007180xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007181 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007182
7183 GROW;
7184 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007185 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007186 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007187 return;
7188 }
7189 SKIP(2);
7190
Daniel Veillard46de64e2002-05-29 08:21:33 +00007191 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007192
7193 /*
7194 * We should definitely be at the ending "S? '>'" part
7195 */
7196 GROW;
7197 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007198 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007199 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007200 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007201 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007202
7203 /*
7204 * [ WFC: Element Type Match ]
7205 * The Name in an element's end-tag must match the element type in the
7206 * start-tag.
7207 *
7208 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007209 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007210 if (name == NULL) name = BAD_CAST "unparseable";
7211 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007212 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007213 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007214 }
7215
7216 /*
7217 * SAX: End of Tag
7218 */
7219 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7220 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007221 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007222
Daniel Veillarde57ec792003-09-10 10:50:59 +00007223 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007224 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007225 return;
7226}
7227
7228/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007229 * xmlParseEndTag:
7230 * @ctxt: an XML parser context
7231 *
7232 * parse an end of tag
7233 *
7234 * [42] ETag ::= '</' Name S? '>'
7235 *
7236 * With namespace
7237 *
7238 * [NS 9] ETag ::= '</' QName S? '>'
7239 */
7240
7241void
7242xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007243 xmlParseEndTag1(ctxt, 0);
7244}
Daniel Veillard81273902003-09-30 00:43:48 +00007245#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007246
7247/************************************************************************
7248 * *
7249 * SAX 2 specific operations *
7250 * *
7251 ************************************************************************/
7252
7253static const xmlChar *
7254xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7255 int len = 0, l;
7256 int c;
7257 int count = 0;
7258
7259 /*
7260 * Handler for more complex cases
7261 */
7262 GROW;
7263 c = CUR_CHAR(l);
7264 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007265 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 return(NULL);
7267 }
7268
7269 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007270 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007271 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007272 (IS_COMBINING(c)) ||
7273 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274 if (count++ > 100) {
7275 count = 0;
7276 GROW;
7277 }
7278 len += l;
7279 NEXTL(l);
7280 c = CUR_CHAR(l);
7281 }
7282 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7283}
7284
7285/*
7286 * xmlGetNamespace:
7287 * @ctxt: an XML parser context
7288 * @prefix: the prefix to lookup
7289 *
7290 * Lookup the namespace name for the @prefix (which ca be NULL)
7291 * The prefix must come from the @ctxt->dict dictionnary
7292 *
7293 * Returns the namespace name or NULL if not bound
7294 */
7295static const xmlChar *
7296xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7297 int i;
7298
Daniel Veillarde57ec792003-09-10 10:50:59 +00007299 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007300 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007301 if (ctxt->nsTab[i] == prefix) {
7302 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7303 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007304 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007305 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007306 return(NULL);
7307}
7308
7309/**
7310 * xmlParseNCName:
7311 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007312 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007313 *
7314 * parse an XML name.
7315 *
7316 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7317 * CombiningChar | Extender
7318 *
7319 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7320 *
7321 * Returns the Name parsed or NULL
7322 */
7323
7324static const xmlChar *
7325xmlParseNCName(xmlParserCtxtPtr ctxt) {
7326 const xmlChar *in;
7327 const xmlChar *ret;
7328 int count = 0;
7329
7330 /*
7331 * Accelerator for simple ASCII names
7332 */
7333 in = ctxt->input->cur;
7334 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7335 ((*in >= 0x41) && (*in <= 0x5A)) ||
7336 (*in == '_')) {
7337 in++;
7338 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7339 ((*in >= 0x41) && (*in <= 0x5A)) ||
7340 ((*in >= 0x30) && (*in <= 0x39)) ||
7341 (*in == '_') || (*in == '-') ||
7342 (*in == '.'))
7343 in++;
7344 if ((*in > 0) && (*in < 0x80)) {
7345 count = in - ctxt->input->cur;
7346 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7347 ctxt->input->cur = in;
7348 ctxt->nbChars += count;
7349 ctxt->input->col += count;
7350 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007351 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007352 }
7353 return(ret);
7354 }
7355 }
7356 return(xmlParseNCNameComplex(ctxt));
7357}
7358
7359/**
7360 * xmlParseQName:
7361 * @ctxt: an XML parser context
7362 * @prefix: pointer to store the prefix part
7363 *
7364 * parse an XML Namespace QName
7365 *
7366 * [6] QName ::= (Prefix ':')? LocalPart
7367 * [7] Prefix ::= NCName
7368 * [8] LocalPart ::= NCName
7369 *
7370 * Returns the Name parsed or NULL
7371 */
7372
7373static const xmlChar *
7374xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7375 const xmlChar *l, *p;
7376
7377 GROW;
7378
7379 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007380 if (l == NULL) {
7381 if (CUR == ':') {
7382 l = xmlParseName(ctxt);
7383 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007384 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7385 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007386 *prefix = NULL;
7387 return(l);
7388 }
7389 }
7390 return(NULL);
7391 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007392 if (CUR == ':') {
7393 NEXT;
7394 p = l;
7395 l = xmlParseNCName(ctxt);
7396 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007397 xmlChar *tmp;
7398
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007399 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7400 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007401 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7402 p = xmlDictLookup(ctxt->dict, tmp, -1);
7403 if (tmp != NULL) xmlFree(tmp);
7404 *prefix = NULL;
7405 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007406 }
7407 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007408 xmlChar *tmp;
7409
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007410 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7411 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007412 NEXT;
7413 tmp = (xmlChar *) xmlParseName(ctxt);
7414 if (tmp != NULL) {
7415 tmp = xmlBuildQName(tmp, l, NULL, 0);
7416 l = xmlDictLookup(ctxt->dict, tmp, -1);
7417 if (tmp != NULL) xmlFree(tmp);
7418 *prefix = p;
7419 return(l);
7420 }
7421 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7422 l = xmlDictLookup(ctxt->dict, tmp, -1);
7423 if (tmp != NULL) xmlFree(tmp);
7424 *prefix = p;
7425 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 }
7427 *prefix = p;
7428 } else
7429 *prefix = NULL;
7430 return(l);
7431}
7432
7433/**
7434 * xmlParseQNameAndCompare:
7435 * @ctxt: an XML parser context
7436 * @name: the localname
7437 * @prefix: the prefix, if any.
7438 *
7439 * parse an XML name and compares for match
7440 * (specialized for endtag parsing)
7441 *
7442 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7443 * and the name for mismatch
7444 */
7445
7446static const xmlChar *
7447xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7448 xmlChar const *prefix) {
7449 const xmlChar *cmp = name;
7450 const xmlChar *in;
7451 const xmlChar *ret;
7452 const xmlChar *prefix2;
7453
7454 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7455
7456 GROW;
7457 in = ctxt->input->cur;
7458
7459 cmp = prefix;
7460 while (*in != 0 && *in == *cmp) {
7461 ++in;
7462 ++cmp;
7463 }
7464 if ((*cmp == 0) && (*in == ':')) {
7465 in++;
7466 cmp = name;
7467 while (*in != 0 && *in == *cmp) {
7468 ++in;
7469 ++cmp;
7470 }
William M. Brack76e95df2003-10-18 16:20:14 +00007471 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 /* success */
7473 ctxt->input->cur = in;
7474 return((const xmlChar*) 1);
7475 }
7476 }
7477 /*
7478 * all strings coms from the dictionary, equality can be done directly
7479 */
7480 ret = xmlParseQName (ctxt, &prefix2);
7481 if ((ret == name) && (prefix == prefix2))
7482 return((const xmlChar*) 1);
7483 return ret;
7484}
7485
7486/**
7487 * xmlParseAttValueInternal:
7488 * @ctxt: an XML parser context
7489 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007490 * @alloc: whether the attribute was reallocated as a new string
7491 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 *
7493 * parse a value for an attribute.
7494 * NOTE: if no normalization is needed, the routine will return pointers
7495 * directly from the data buffer.
7496 *
7497 * 3.3.3 Attribute-Value Normalization:
7498 * Before the value of an attribute is passed to the application or
7499 * checked for validity, the XML processor must normalize it as follows:
7500 * - a character reference is processed by appending the referenced
7501 * character to the attribute value
7502 * - an entity reference is processed by recursively processing the
7503 * replacement text of the entity
7504 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7505 * appending #x20 to the normalized value, except that only a single
7506 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7507 * parsed entity or the literal entity value of an internal parsed entity
7508 * - other characters are processed by appending them to the normalized value
7509 * If the declared value is not CDATA, then the XML processor must further
7510 * process the normalized attribute value by discarding any leading and
7511 * trailing space (#x20) characters, and by replacing sequences of space
7512 * (#x20) characters by a single space (#x20) character.
7513 * All attributes for which no declaration has been read should be treated
7514 * by a non-validating parser as if declared CDATA.
7515 *
7516 * Returns the AttValue parsed or NULL. The value has to be freed by the
7517 * caller if it was copied, this can be detected by val[*len] == 0.
7518 */
7519
7520static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007521xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7522 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007523{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007525 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007526 xmlChar *ret = NULL;
7527
7528 GROW;
7529 in = (xmlChar *) CUR_PTR;
7530 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007531 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007532 return (NULL);
7533 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007534 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007535
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007536 /*
7537 * try to handle in this routine the most common case where no
7538 * allocation of a new string is required and where content is
7539 * pure ASCII.
7540 */
7541 limit = *in++;
7542 end = ctxt->input->end;
7543 start = in;
7544 if (in >= end) {
7545 const xmlChar *oldbase = ctxt->input->base;
7546 GROW;
7547 if (oldbase != ctxt->input->base) {
7548 long delta = ctxt->input->base - oldbase;
7549 start = start + delta;
7550 in = in + delta;
7551 }
7552 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007553 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007554 if (normalize) {
7555 /*
7556 * Skip any leading spaces
7557 */
7558 while ((in < end) && (*in != limit) &&
7559 ((*in == 0x20) || (*in == 0x9) ||
7560 (*in == 0xA) || (*in == 0xD))) {
7561 in++;
7562 start = in;
7563 if (in >= end) {
7564 const xmlChar *oldbase = ctxt->input->base;
7565 GROW;
7566 if (oldbase != ctxt->input->base) {
7567 long delta = ctxt->input->base - oldbase;
7568 start = start + delta;
7569 in = in + delta;
7570 }
7571 end = ctxt->input->end;
7572 }
7573 }
7574 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7575 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7576 if ((*in++ == 0x20) && (*in == 0x20)) break;
7577 if (in >= end) {
7578 const xmlChar *oldbase = ctxt->input->base;
7579 GROW;
7580 if (oldbase != ctxt->input->base) {
7581 long delta = ctxt->input->base - oldbase;
7582 start = start + delta;
7583 in = in + delta;
7584 }
7585 end = ctxt->input->end;
7586 }
7587 }
7588 last = in;
7589 /*
7590 * skip the trailing blanks
7591 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007592 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007593 while ((in < end) && (*in != limit) &&
7594 ((*in == 0x20) || (*in == 0x9) ||
7595 (*in == 0xA) || (*in == 0xD))) {
7596 in++;
7597 if (in >= end) {
7598 const xmlChar *oldbase = ctxt->input->base;
7599 GROW;
7600 if (oldbase != ctxt->input->base) {
7601 long delta = ctxt->input->base - oldbase;
7602 start = start + delta;
7603 in = in + delta;
7604 last = last + delta;
7605 }
7606 end = ctxt->input->end;
7607 }
7608 }
7609 if (*in != limit) goto need_complex;
7610 } else {
7611 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7612 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7613 in++;
7614 if (in >= end) {
7615 const xmlChar *oldbase = ctxt->input->base;
7616 GROW;
7617 if (oldbase != ctxt->input->base) {
7618 long delta = ctxt->input->base - oldbase;
7619 start = start + delta;
7620 in = in + delta;
7621 }
7622 end = ctxt->input->end;
7623 }
7624 }
7625 last = in;
7626 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007627 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007628 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007629 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007630 *len = last - start;
7631 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007632 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007633 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007634 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007635 }
7636 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007637 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007638 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007639need_complex:
7640 if (alloc) *alloc = 1;
7641 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007642}
7643
7644/**
7645 * xmlParseAttribute2:
7646 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007647 * @pref: the element prefix
7648 * @elem: the element name
7649 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007651 * @len: an int * to save the length of the attribute
7652 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007653 *
7654 * parse an attribute in the new SAX2 framework.
7655 *
7656 * Returns the attribute name, and the value in *value, .
7657 */
7658
7659static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007660xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7661 const xmlChar *pref, const xmlChar *elem,
7662 const xmlChar **prefix, xmlChar **value,
7663 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007665 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007666 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007667
7668 *value = NULL;
7669 GROW;
7670 name = xmlParseQName(ctxt, prefix);
7671 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007672 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7673 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007674 return(NULL);
7675 }
7676
7677 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 * get the type if needed
7679 */
7680 if (ctxt->attsSpecial != NULL) {
7681 int type;
7682
7683 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7684 pref, elem, *prefix, name);
7685 if (type != 0) normalize = 1;
7686 }
7687
7688 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007689 * read the value
7690 */
7691 SKIP_BLANKS;
7692 if (RAW == '=') {
7693 NEXT;
7694 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007695 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007696 ctxt->instate = XML_PARSER_CONTENT;
7697 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007698 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007699 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007700 return(NULL);
7701 }
7702
Daniel Veillardd8925572005-06-08 22:34:55 +00007703 if (*prefix == ctxt->str_xml) {
7704 /*
7705 * Check that xml:lang conforms to the specification
7706 * No more registered as an error, just generate a warning now
7707 * since this was deprecated in XML second edition
7708 */
7709 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7710 internal_val = xmlStrndup(val, *len);
7711 if (!xmlCheckLanguageID(internal_val)) {
7712 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7713 "Malformed value for xml:lang : %s\n",
7714 internal_val, NULL);
7715 }
7716 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717
Daniel Veillardd8925572005-06-08 22:34:55 +00007718 /*
7719 * Check that xml:space conforms to the specification
7720 */
7721 if (xmlStrEqual(name, BAD_CAST "space")) {
7722 internal_val = xmlStrndup(val, *len);
7723 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7724 *(ctxt->space) = 0;
7725 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7726 *(ctxt->space) = 1;
7727 else {
7728 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007730 internal_val, NULL);
7731 }
7732 }
7733 if (internal_val) {
7734 xmlFree(internal_val);
7735 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737
7738 *value = val;
7739 return(name);
7740}
7741
7742/**
7743 * xmlParseStartTag2:
7744 * @ctxt: an XML parser context
7745 *
7746 * parse a start of tag either for rule element or
7747 * EmptyElement. In both case we don't parse the tag closing chars.
7748 * This routine is called when running SAX2 parsing
7749 *
7750 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7751 *
7752 * [ WFC: Unique Att Spec ]
7753 * No attribute name may appear more than once in the same start-tag or
7754 * empty-element tag.
7755 *
7756 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7757 *
7758 * [ WFC: Unique Att Spec ]
7759 * No attribute name may appear more than once in the same start-tag or
7760 * empty-element tag.
7761 *
7762 * With namespace:
7763 *
7764 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7765 *
7766 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7767 *
7768 * Returns the element name parsed
7769 */
7770
7771static const xmlChar *
7772xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007773 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007774 const xmlChar *localname;
7775 const xmlChar *prefix;
7776 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007777 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007778 const xmlChar *nsname;
7779 xmlChar *attvalue;
7780 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007781 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007782 int nratts, nbatts, nbdef;
7783 int i, j, nbNs, attval;
7784 const xmlChar *base;
7785 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007786 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787
7788 if (RAW != '<') return(NULL);
7789 NEXT1;
7790
7791 /*
7792 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7793 * point since the attribute values may be stored as pointers to
7794 * the buffer and calling SHRINK would destroy them !
7795 * The Shrinking is only possible once the full set of attribute
7796 * callbacks have been done.
7797 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007798reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007800 base = ctxt->input->base;
7801 cur = ctxt->input->cur - ctxt->input->base;
7802 nbatts = 0;
7803 nratts = 0;
7804 nbdef = 0;
7805 nbNs = 0;
7806 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007807 /* Forget any namespaces added during an earlier parse of this element. */
7808 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007809
7810 localname = xmlParseQName(ctxt, &prefix);
7811 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007812 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7813 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007814 return(NULL);
7815 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007816 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007817
7818 /*
7819 * Now parse the attributes, it ends up with the ending
7820 *
7821 * (S Attribute)* S?
7822 */
7823 SKIP_BLANKS;
7824 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007825 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007826
7827 while ((RAW != '>') &&
7828 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007829 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007830 const xmlChar *q = CUR_PTR;
7831 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007832 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007833
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007834 attname = xmlParseAttribute2(ctxt, prefix, localname,
7835 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007836 if ((attname != NULL) && (attvalue != NULL)) {
7837 if (len < 0) len = xmlStrlen(attvalue);
7838 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007839 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7840 xmlURIPtr uri;
7841
7842 if (*URL != 0) {
7843 uri = xmlParseURI((const char *) URL);
7844 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007845 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7846 "xmlns: %s not a valid URI\n",
7847 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007848 } else {
7849 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007850 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7851 "xmlns: URI %s is not absolute\n",
7852 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007853 }
7854 xmlFreeURI(uri);
7855 }
7856 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007857 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007858 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007859 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007860 for (j = 1;j <= nbNs;j++)
7861 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7862 break;
7863 if (j <= nbNs)
7864 xmlErrAttributeDup(ctxt, NULL, attname);
7865 else
7866 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 if (alloc != 0) xmlFree(attvalue);
7868 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007869 continue;
7870 }
7871 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007872 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7873 xmlURIPtr uri;
7874
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007875 if (attname == ctxt->str_xml) {
7876 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007877 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7878 "xml namespace prefix mapped to wrong URI\n",
7879 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007880 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007881 /*
7882 * Do not keep a namespace definition node
7883 */
7884 if (alloc != 0) xmlFree(attvalue);
7885 SKIP_BLANKS;
7886 continue;
7887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007888 uri = xmlParseURI((const char *) URL);
7889 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007890 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7891 "xmlns:%s: '%s' is not a valid URI\n",
7892 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007893 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007894 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007895 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7896 "xmlns:%s: URI %s is not absolute\n",
7897 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007898 }
7899 xmlFreeURI(uri);
7900 }
7901
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007903 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007904 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007905 for (j = 1;j <= nbNs;j++)
7906 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7907 break;
7908 if (j <= nbNs)
7909 xmlErrAttributeDup(ctxt, aprefix, attname);
7910 else
7911 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007912 if (alloc != 0) xmlFree(attvalue);
7913 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007914 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007915 continue;
7916 }
7917
7918 /*
7919 * Add the pair to atts
7920 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007921 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7922 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007923 if (attvalue[len] == 0)
7924 xmlFree(attvalue);
7925 goto failed;
7926 }
7927 maxatts = ctxt->maxatts;
7928 atts = ctxt->atts;
7929 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007930 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931 atts[nbatts++] = attname;
7932 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007933 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007934 atts[nbatts++] = attvalue;
7935 attvalue += len;
7936 atts[nbatts++] = attvalue;
7937 /*
7938 * tag if some deallocation is needed
7939 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007940 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007941 } else {
7942 if ((attvalue != NULL) && (attvalue[len] == 0))
7943 xmlFree(attvalue);
7944 }
7945
7946failed:
7947
7948 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007949 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007950 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7951 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007952 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7954 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007955 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007956 }
7957 SKIP_BLANKS;
7958 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7959 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007960 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007961 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 break;
7963 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007965 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 }
7967
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007969 * The attributes defaulting
7970 */
7971 if (ctxt->attsDefault != NULL) {
7972 xmlDefAttrsPtr defaults;
7973
7974 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7975 if (defaults != NULL) {
7976 for (i = 0;i < defaults->nbAttrs;i++) {
7977 attname = defaults->values[4 * i];
7978 aprefix = defaults->values[4 * i + 1];
7979
7980 /*
7981 * special work for namespaces defaulted defs
7982 */
7983 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7984 /*
7985 * check that it's not a defined namespace
7986 */
7987 for (j = 1;j <= nbNs;j++)
7988 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7989 break;
7990 if (j <= nbNs) continue;
7991
7992 nsname = xmlGetNamespace(ctxt, NULL);
7993 if (nsname != defaults->values[4 * i + 2]) {
7994 if (nsPush(ctxt, NULL,
7995 defaults->values[4 * i + 2]) > 0)
7996 nbNs++;
7997 }
7998 } else if (aprefix == ctxt->str_xmlns) {
7999 /*
8000 * check that it's not a defined namespace
8001 */
8002 for (j = 1;j <= nbNs;j++)
8003 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8004 break;
8005 if (j <= nbNs) continue;
8006
8007 nsname = xmlGetNamespace(ctxt, attname);
8008 if (nsname != defaults->values[2]) {
8009 if (nsPush(ctxt, attname,
8010 defaults->values[4 * i + 2]) > 0)
8011 nbNs++;
8012 }
8013 } else {
8014 /*
8015 * check that it's not a defined attribute
8016 */
8017 for (j = 0;j < nbatts;j+=5) {
8018 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8019 break;
8020 }
8021 if (j < nbatts) continue;
8022
8023 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8024 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008025 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008026 }
8027 maxatts = ctxt->maxatts;
8028 atts = ctxt->atts;
8029 }
8030 atts[nbatts++] = attname;
8031 atts[nbatts++] = aprefix;
8032 if (aprefix == NULL)
8033 atts[nbatts++] = NULL;
8034 else
8035 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8036 atts[nbatts++] = defaults->values[4 * i + 2];
8037 atts[nbatts++] = defaults->values[4 * i + 3];
8038 nbdef++;
8039 }
8040 }
8041 }
8042 }
8043
Daniel Veillarde70c8772003-11-25 07:21:18 +00008044 /*
8045 * The attributes checkings
8046 */
8047 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008048 /*
8049 * The default namespace does not apply to attribute names.
8050 */
8051 if (atts[i + 1] != NULL) {
8052 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8053 if (nsname == NULL) {
8054 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8055 "Namespace prefix %s for %s on %s is not defined\n",
8056 atts[i + 1], atts[i], localname);
8057 }
8058 atts[i + 2] = nsname;
8059 } else
8060 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008061 /*
8062 * [ WFC: Unique Att Spec ]
8063 * No attribute name may appear more than once in the same
8064 * start-tag or empty-element tag.
8065 * As extended by the Namespace in XML REC.
8066 */
8067 for (j = 0; j < i;j += 5) {
8068 if (atts[i] == atts[j]) {
8069 if (atts[i+1] == atts[j+1]) {
8070 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8071 break;
8072 }
8073 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8074 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8075 "Namespaced Attribute %s in '%s' redefined\n",
8076 atts[i], nsname, NULL);
8077 break;
8078 }
8079 }
8080 }
8081 }
8082
Daniel Veillarde57ec792003-09-10 10:50:59 +00008083 nsname = xmlGetNamespace(ctxt, prefix);
8084 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008085 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8086 "Namespace prefix %s on %s is not defined\n",
8087 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008088 }
8089 *pref = prefix;
8090 *URI = nsname;
8091
8092 /*
8093 * SAX: Start of Element !
8094 */
8095 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8096 (!ctxt->disableSAX)) {
8097 if (nbNs > 0)
8098 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8099 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8100 nbatts / 5, nbdef, atts);
8101 else
8102 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8103 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8104 }
8105
8106 /*
8107 * Free up attribute allocated strings if needed
8108 */
8109 if (attval != 0) {
8110 for (i = 3,j = 0; j < nratts;i += 5,j++)
8111 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8112 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008113 }
8114
8115 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008116
8117base_changed:
8118 /*
8119 * the attribute strings are valid iif the base didn't changed
8120 */
8121 if (attval != 0) {
8122 for (i = 3,j = 0; j < nratts;i += 5,j++)
8123 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8124 xmlFree((xmlChar *) atts[i]);
8125 }
8126 ctxt->input->cur = ctxt->input->base + cur;
8127 if (ctxt->wellFormed == 1) {
8128 goto reparse;
8129 }
8130 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008131}
8132
8133/**
8134 * xmlParseEndTag2:
8135 * @ctxt: an XML parser context
8136 * @line: line of the start tag
8137 * @nsNr: number of namespaces on the start tag
8138 *
8139 * parse an end of tag
8140 *
8141 * [42] ETag ::= '</' Name S? '>'
8142 *
8143 * With namespace
8144 *
8145 * [NS 9] ETag ::= '</' QName S? '>'
8146 */
8147
8148static void
8149xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008150 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008151 const xmlChar *name;
8152
8153 GROW;
8154 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008155 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008156 return;
8157 }
8158 SKIP(2);
8159
William M. Brack13dfa872004-09-18 04:52:08 +00008160 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008161 if (ctxt->input->cur[tlen] == '>') {
8162 ctxt->input->cur += tlen + 1;
8163 goto done;
8164 }
8165 ctxt->input->cur += tlen;
8166 name = (xmlChar*)1;
8167 } else {
8168 if (prefix == NULL)
8169 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8170 else
8171 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8172 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008173
8174 /*
8175 * We should definitely be at the ending "S? '>'" part
8176 */
8177 GROW;
8178 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008179 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008180 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181 } else
8182 NEXT1;
8183
8184 /*
8185 * [ WFC: Element Type Match ]
8186 * The Name in an element's end-tag must match the element type in the
8187 * start-tag.
8188 *
8189 */
8190 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008191 if (name == NULL) name = BAD_CAST "unparseable";
8192 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008194 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195 }
8196
8197 /*
8198 * SAX: End of Tag
8199 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008200done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8202 (!ctxt->disableSAX))
8203 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8204
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 spacePop(ctxt);
8206 if (nsNr != 0)
8207 nsPop(ctxt, nsNr);
8208 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008209}
8210
8211/**
Owen Taylor3473f882001-02-23 17:55:21 +00008212 * xmlParseCDSect:
8213 * @ctxt: an XML parser context
8214 *
8215 * Parse escaped pure raw content.
8216 *
8217 * [18] CDSect ::= CDStart CData CDEnd
8218 *
8219 * [19] CDStart ::= '<![CDATA['
8220 *
8221 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8222 *
8223 * [21] CDEnd ::= ']]>'
8224 */
8225void
8226xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8227 xmlChar *buf = NULL;
8228 int len = 0;
8229 int size = XML_PARSER_BUFFER_SIZE;
8230 int r, rl;
8231 int s, sl;
8232 int cur, l;
8233 int count = 0;
8234
Daniel Veillard8f597c32003-10-06 08:19:27 +00008235 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008236 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008237 SKIP(9);
8238 } else
8239 return;
8240
8241 ctxt->instate = XML_PARSER_CDATA_SECTION;
8242 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008243 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008244 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008245 ctxt->instate = XML_PARSER_CONTENT;
8246 return;
8247 }
8248 NEXTL(rl);
8249 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008250 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008251 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252 ctxt->instate = XML_PARSER_CONTENT;
8253 return;
8254 }
8255 NEXTL(sl);
8256 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008257 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008258 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008259 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 return;
8261 }
William M. Brack871611b2003-10-18 04:53:14 +00008262 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008263 ((r != ']') || (s != ']') || (cur != '>'))) {
8264 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008265 xmlChar *tmp;
8266
Owen Taylor3473f882001-02-23 17:55:21 +00008267 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008268 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8269 if (tmp == NULL) {
8270 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008271 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008272 return;
8273 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008274 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008275 }
8276 COPY_BUF(rl,buf,len,r);
8277 r = s;
8278 rl = sl;
8279 s = cur;
8280 sl = l;
8281 count++;
8282 if (count > 50) {
8283 GROW;
8284 count = 0;
8285 }
8286 NEXTL(l);
8287 cur = CUR_CHAR(l);
8288 }
8289 buf[len] = 0;
8290 ctxt->instate = XML_PARSER_CONTENT;
8291 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008292 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008293 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008294 xmlFree(buf);
8295 return;
8296 }
8297 NEXTL(l);
8298
8299 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008300 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008301 */
8302 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8303 if (ctxt->sax->cdataBlock != NULL)
8304 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008305 else if (ctxt->sax->characters != NULL)
8306 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008307 }
8308 xmlFree(buf);
8309}
8310
8311/**
8312 * xmlParseContent:
8313 * @ctxt: an XML parser context
8314 *
8315 * Parse a content:
8316 *
8317 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8318 */
8319
8320void
8321xmlParseContent(xmlParserCtxtPtr ctxt) {
8322 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008323 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008324 ((RAW != '<') || (NXT(1) != '/'))) {
8325 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008326 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008327 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008328
8329 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008330 * First case : a Processing Instruction.
8331 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008332 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008333 xmlParsePI(ctxt);
8334 }
8335
8336 /*
8337 * Second case : a CDSection
8338 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008339 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008340 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008341 xmlParseCDSect(ctxt);
8342 }
8343
8344 /*
8345 * Third case : a comment
8346 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008347 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008348 (NXT(2) == '-') && (NXT(3) == '-')) {
8349 xmlParseComment(ctxt);
8350 ctxt->instate = XML_PARSER_CONTENT;
8351 }
8352
8353 /*
8354 * Fourth case : a sub-element.
8355 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008356 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008357 xmlParseElement(ctxt);
8358 }
8359
8360 /*
8361 * Fifth case : a reference. If if has not been resolved,
8362 * parsing returns it's Name, create the node
8363 */
8364
Daniel Veillard21a0f912001-02-25 19:54:14 +00008365 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008366 xmlParseReference(ctxt);
8367 }
8368
8369 /*
8370 * Last case, text. Note that References are handled directly.
8371 */
8372 else {
8373 xmlParseCharData(ctxt, 0);
8374 }
8375
8376 GROW;
8377 /*
8378 * Pop-up of finished entities.
8379 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008380 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008381 xmlPopInput(ctxt);
8382 SHRINK;
8383
Daniel Veillardfdc91562002-07-01 21:52:03 +00008384 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008385 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8386 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008387 ctxt->instate = XML_PARSER_EOF;
8388 break;
8389 }
8390 }
8391}
8392
8393/**
8394 * xmlParseElement:
8395 * @ctxt: an XML parser context
8396 *
8397 * parse an XML element, this is highly recursive
8398 *
8399 * [39] element ::= EmptyElemTag | STag content ETag
8400 *
8401 * [ WFC: Element Type Match ]
8402 * The Name in an element's end-tag must match the element type in the
8403 * start-tag.
8404 *
Owen Taylor3473f882001-02-23 17:55:21 +00008405 */
8406
8407void
8408xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008409 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008410 const xmlChar *prefix;
8411 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008412 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008413 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008414 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008415 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008416
8417 /* Capture start position */
8418 if (ctxt->record_info) {
8419 node_info.begin_pos = ctxt->input->consumed +
8420 (CUR_PTR - ctxt->input->base);
8421 node_info.begin_line = ctxt->input->line;
8422 }
8423
8424 if (ctxt->spaceNr == 0)
8425 spacePush(ctxt, -1);
8426 else
8427 spacePush(ctxt, *ctxt->space);
8428
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008429 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008430#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008431 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008432#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008433 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008434#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008435 else
8436 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008437#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008438 if (name == NULL) {
8439 spacePop(ctxt);
8440 return;
8441 }
8442 namePush(ctxt, name);
8443 ret = ctxt->node;
8444
Daniel Veillard4432df22003-09-28 18:58:27 +00008445#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008446 /*
8447 * [ VC: Root Element Type ]
8448 * The Name in the document type declaration must match the element
8449 * type of the root element.
8450 */
8451 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8452 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8453 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008454#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008455
8456 /*
8457 * Check for an Empty Element.
8458 */
8459 if ((RAW == '/') && (NXT(1) == '>')) {
8460 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008461 if (ctxt->sax2) {
8462 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8463 (!ctxt->disableSAX))
8464 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008465#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008466 } else {
8467 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8468 (!ctxt->disableSAX))
8469 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008470#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008471 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008472 namePop(ctxt);
8473 spacePop(ctxt);
8474 if (nsNr != ctxt->nsNr)
8475 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008476 if ( ret != NULL && ctxt->record_info ) {
8477 node_info.end_pos = ctxt->input->consumed +
8478 (CUR_PTR - ctxt->input->base);
8479 node_info.end_line = ctxt->input->line;
8480 node_info.node = ret;
8481 xmlParserAddNodeInfo(ctxt, &node_info);
8482 }
8483 return;
8484 }
8485 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008486 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008487 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008488 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8489 "Couldn't find end of Start Tag %s line %d\n",
8490 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008491
8492 /*
8493 * end of parsing of this node.
8494 */
8495 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008497 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008498 if (nsNr != ctxt->nsNr)
8499 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008500
8501 /*
8502 * Capture end position and add node
8503 */
8504 if ( ret != NULL && ctxt->record_info ) {
8505 node_info.end_pos = ctxt->input->consumed +
8506 (CUR_PTR - ctxt->input->base);
8507 node_info.end_line = ctxt->input->line;
8508 node_info.node = ret;
8509 xmlParserAddNodeInfo(ctxt, &node_info);
8510 }
8511 return;
8512 }
8513
8514 /*
8515 * Parse the content of the element:
8516 */
8517 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008518 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008519 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008520 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008521 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008522
8523 /*
8524 * end of parsing of this node.
8525 */
8526 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008528 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529 if (nsNr != ctxt->nsNr)
8530 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008531 return;
8532 }
8533
8534 /*
8535 * parse the end of tag: '</' should be here.
8536 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008537 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008538 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008539 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008540 }
8541#ifdef LIBXML_SAX1_ENABLED
8542 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008545
8546 /*
8547 * Capture end position and add node
8548 */
8549 if ( ret != NULL && ctxt->record_info ) {
8550 node_info.end_pos = ctxt->input->consumed +
8551 (CUR_PTR - ctxt->input->base);
8552 node_info.end_line = ctxt->input->line;
8553 node_info.node = ret;
8554 xmlParserAddNodeInfo(ctxt, &node_info);
8555 }
8556}
8557
8558/**
8559 * xmlParseVersionNum:
8560 * @ctxt: an XML parser context
8561 *
8562 * parse the XML version value.
8563 *
8564 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8565 *
8566 * Returns the string giving the XML version number, or NULL
8567 */
8568xmlChar *
8569xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8570 xmlChar *buf = NULL;
8571 int len = 0;
8572 int size = 10;
8573 xmlChar cur;
8574
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008575 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008576 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008577 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008578 return(NULL);
8579 }
8580 cur = CUR;
8581 while (((cur >= 'a') && (cur <= 'z')) ||
8582 ((cur >= 'A') && (cur <= 'Z')) ||
8583 ((cur >= '0') && (cur <= '9')) ||
8584 (cur == '_') || (cur == '.') ||
8585 (cur == ':') || (cur == '-')) {
8586 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008587 xmlChar *tmp;
8588
Owen Taylor3473f882001-02-23 17:55:21 +00008589 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008590 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8591 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008592 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008593 return(NULL);
8594 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008595 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008596 }
8597 buf[len++] = cur;
8598 NEXT;
8599 cur=CUR;
8600 }
8601 buf[len] = 0;
8602 return(buf);
8603}
8604
8605/**
8606 * xmlParseVersionInfo:
8607 * @ctxt: an XML parser context
8608 *
8609 * parse the XML version.
8610 *
8611 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8612 *
8613 * [25] Eq ::= S? '=' S?
8614 *
8615 * Returns the version string, e.g. "1.0"
8616 */
8617
8618xmlChar *
8619xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8620 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008621
Daniel Veillarda07050d2003-10-19 14:46:32 +00008622 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008623 SKIP(7);
8624 SKIP_BLANKS;
8625 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008626 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 return(NULL);
8628 }
8629 NEXT;
8630 SKIP_BLANKS;
8631 if (RAW == '"') {
8632 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008633 version = xmlParseVersionNum(ctxt);
8634 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008635 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 } else
8637 NEXT;
8638 } else if (RAW == '\''){
8639 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008640 version = xmlParseVersionNum(ctxt);
8641 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008642 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008643 } else
8644 NEXT;
8645 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008646 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 }
8648 }
8649 return(version);
8650}
8651
8652/**
8653 * xmlParseEncName:
8654 * @ctxt: an XML parser context
8655 *
8656 * parse the XML encoding name
8657 *
8658 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8659 *
8660 * Returns the encoding name value or NULL
8661 */
8662xmlChar *
8663xmlParseEncName(xmlParserCtxtPtr ctxt) {
8664 xmlChar *buf = NULL;
8665 int len = 0;
8666 int size = 10;
8667 xmlChar cur;
8668
8669 cur = CUR;
8670 if (((cur >= 'a') && (cur <= 'z')) ||
8671 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008672 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008675 return(NULL);
8676 }
8677
8678 buf[len++] = cur;
8679 NEXT;
8680 cur = CUR;
8681 while (((cur >= 'a') && (cur <= 'z')) ||
8682 ((cur >= 'A') && (cur <= 'Z')) ||
8683 ((cur >= '0') && (cur <= '9')) ||
8684 (cur == '.') || (cur == '_') ||
8685 (cur == '-')) {
8686 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008687 xmlChar *tmp;
8688
Owen Taylor3473f882001-02-23 17:55:21 +00008689 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008690 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8691 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008692 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008693 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008694 return(NULL);
8695 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008696 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008697 }
8698 buf[len++] = cur;
8699 NEXT;
8700 cur = CUR;
8701 if (cur == 0) {
8702 SHRINK;
8703 GROW;
8704 cur = CUR;
8705 }
8706 }
8707 buf[len] = 0;
8708 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008709 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008710 }
8711 return(buf);
8712}
8713
8714/**
8715 * xmlParseEncodingDecl:
8716 * @ctxt: an XML parser context
8717 *
8718 * parse the XML encoding declaration
8719 *
8720 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8721 *
8722 * this setups the conversion filters.
8723 *
8724 * Returns the encoding value or NULL
8725 */
8726
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008727const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008728xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8729 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008730
8731 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008732 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008733 SKIP(8);
8734 SKIP_BLANKS;
8735 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008736 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008737 return(NULL);
8738 }
8739 NEXT;
8740 SKIP_BLANKS;
8741 if (RAW == '"') {
8742 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008743 encoding = xmlParseEncName(ctxt);
8744 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008745 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008746 } else
8747 NEXT;
8748 } else if (RAW == '\''){
8749 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008750 encoding = xmlParseEncName(ctxt);
8751 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008752 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008753 } else
8754 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008755 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008756 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008757 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008758 /*
8759 * UTF-16 encoding stwich has already taken place at this stage,
8760 * more over the little-endian/big-endian selection is already done
8761 */
8762 if ((encoding != NULL) &&
8763 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8764 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008765 if (ctxt->encoding != NULL)
8766 xmlFree((xmlChar *) ctxt->encoding);
8767 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008768 }
8769 /*
8770 * UTF-8 encoding is handled natively
8771 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008772 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008773 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8774 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008775 if (ctxt->encoding != NULL)
8776 xmlFree((xmlChar *) ctxt->encoding);
8777 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008778 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008779 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008780 xmlCharEncodingHandlerPtr handler;
8781
8782 if (ctxt->input->encoding != NULL)
8783 xmlFree((xmlChar *) ctxt->input->encoding);
8784 ctxt->input->encoding = encoding;
8785
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008786 handler = xmlFindCharEncodingHandler((const char *) encoding);
8787 if (handler != NULL) {
8788 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008789 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008790 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008791 "Unsupported encoding %s\n", encoding);
8792 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008793 }
8794 }
8795 }
8796 return(encoding);
8797}
8798
8799/**
8800 * xmlParseSDDecl:
8801 * @ctxt: an XML parser context
8802 *
8803 * parse the XML standalone declaration
8804 *
8805 * [32] SDDecl ::= S 'standalone' Eq
8806 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8807 *
8808 * [ VC: Standalone Document Declaration ]
8809 * TODO The standalone document declaration must have the value "no"
8810 * if any external markup declarations contain declarations of:
8811 * - attributes with default values, if elements to which these
8812 * attributes apply appear in the document without specifications
8813 * of values for these attributes, or
8814 * - entities (other than amp, lt, gt, apos, quot), if references
8815 * to those entities appear in the document, or
8816 * - attributes with values subject to normalization, where the
8817 * attribute appears in the document with a value which will change
8818 * as a result of normalization, or
8819 * - element types with element content, if white space occurs directly
8820 * within any instance of those types.
8821 *
8822 * Returns 1 if standalone, 0 otherwise
8823 */
8824
8825int
8826xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8827 int standalone = -1;
8828
8829 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008830 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008831 SKIP(10);
8832 SKIP_BLANKS;
8833 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008834 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008835 return(standalone);
8836 }
8837 NEXT;
8838 SKIP_BLANKS;
8839 if (RAW == '\''){
8840 NEXT;
8841 if ((RAW == 'n') && (NXT(1) == 'o')) {
8842 standalone = 0;
8843 SKIP(2);
8844 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8845 (NXT(2) == 's')) {
8846 standalone = 1;
8847 SKIP(3);
8848 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008849 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008850 }
8851 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008852 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 } else
8854 NEXT;
8855 } else if (RAW == '"'){
8856 NEXT;
8857 if ((RAW == 'n') && (NXT(1) == 'o')) {
8858 standalone = 0;
8859 SKIP(2);
8860 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8861 (NXT(2) == 's')) {
8862 standalone = 1;
8863 SKIP(3);
8864 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008865 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008866 }
8867 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008868 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008869 } else
8870 NEXT;
8871 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008872 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008873 }
8874 }
8875 return(standalone);
8876}
8877
8878/**
8879 * xmlParseXMLDecl:
8880 * @ctxt: an XML parser context
8881 *
8882 * parse an XML declaration header
8883 *
8884 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8885 */
8886
8887void
8888xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8889 xmlChar *version;
8890
8891 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008892 * This value for standalone indicates that the document has an
8893 * XML declaration but it does not have a standalone attribute.
8894 * It will be overwritten later if a standalone attribute is found.
8895 */
8896 ctxt->input->standalone = -2;
8897
8898 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008899 * We know that '<?xml' is here.
8900 */
8901 SKIP(5);
8902
William M. Brack76e95df2003-10-18 16:20:14 +00008903 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008904 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8905 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008906 }
8907 SKIP_BLANKS;
8908
8909 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008910 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008911 */
8912 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008913 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008914 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008915 } else {
8916 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8917 /*
8918 * TODO: Blueberry should be detected here
8919 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008920 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8921 "Unsupported version '%s'\n",
8922 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008923 }
8924 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008925 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008926 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008927 }
Owen Taylor3473f882001-02-23 17:55:21 +00008928
8929 /*
8930 * We may have the encoding declaration
8931 */
William M. Brack76e95df2003-10-18 16:20:14 +00008932 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008933 if ((RAW == '?') && (NXT(1) == '>')) {
8934 SKIP(2);
8935 return;
8936 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008938 }
8939 xmlParseEncodingDecl(ctxt);
8940 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8941 /*
8942 * The XML REC instructs us to stop parsing right here
8943 */
8944 return;
8945 }
8946
8947 /*
8948 * We may have the standalone status.
8949 */
William M. Brack76e95df2003-10-18 16:20:14 +00008950 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008951 if ((RAW == '?') && (NXT(1) == '>')) {
8952 SKIP(2);
8953 return;
8954 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008955 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008956 }
8957 SKIP_BLANKS;
8958 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8959
8960 SKIP_BLANKS;
8961 if ((RAW == '?') && (NXT(1) == '>')) {
8962 SKIP(2);
8963 } else if (RAW == '>') {
8964 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008965 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008966 NEXT;
8967 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008968 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008969 MOVETO_ENDTAG(CUR_PTR);
8970 NEXT;
8971 }
8972}
8973
8974/**
8975 * xmlParseMisc:
8976 * @ctxt: an XML parser context
8977 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008978 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008979 *
8980 * [27] Misc ::= Comment | PI | S
8981 */
8982
8983void
8984xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008985 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008986 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008987 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008988 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008989 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008990 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008991 NEXT;
8992 } else
8993 xmlParseComment(ctxt);
8994 }
8995}
8996
8997/**
8998 * xmlParseDocument:
8999 * @ctxt: an XML parser context
9000 *
9001 * parse an XML document (and build a tree if using the standard SAX
9002 * interface).
9003 *
9004 * [1] document ::= prolog element Misc*
9005 *
9006 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9007 *
9008 * Returns 0, -1 in case of error. the parser context is augmented
9009 * as a result of the parsing.
9010 */
9011
9012int
9013xmlParseDocument(xmlParserCtxtPtr ctxt) {
9014 xmlChar start[4];
9015 xmlCharEncoding enc;
9016
9017 xmlInitParser();
9018
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009019 if ((ctxt == NULL) || (ctxt->input == NULL))
9020 return(-1);
9021
Owen Taylor3473f882001-02-23 17:55:21 +00009022 GROW;
9023
9024 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009025 * SAX: detecting the level.
9026 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009027 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028
9029 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009030 * SAX: beginning of the document processing.
9031 */
9032 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9033 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9034
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009035 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9036 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009037 /*
9038 * Get the 4 first bytes and decode the charset
9039 * if enc != XML_CHAR_ENCODING_NONE
9040 * plug some encoding conversion routines.
9041 */
9042 start[0] = RAW;
9043 start[1] = NXT(1);
9044 start[2] = NXT(2);
9045 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009046 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009047 if (enc != XML_CHAR_ENCODING_NONE) {
9048 xmlSwitchEncoding(ctxt, enc);
9049 }
Owen Taylor3473f882001-02-23 17:55:21 +00009050 }
9051
9052
9053 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009054 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009055 }
9056
9057 /*
9058 * Check for the XMLDecl in the Prolog.
9059 */
9060 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009061 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009062
9063 /*
9064 * Note that we will switch encoding on the fly.
9065 */
9066 xmlParseXMLDecl(ctxt);
9067 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9068 /*
9069 * The XML REC instructs us to stop parsing right here
9070 */
9071 return(-1);
9072 }
9073 ctxt->standalone = ctxt->input->standalone;
9074 SKIP_BLANKS;
9075 } else {
9076 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9077 }
9078 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9079 ctxt->sax->startDocument(ctxt->userData);
9080
9081 /*
9082 * The Misc part of the Prolog
9083 */
9084 GROW;
9085 xmlParseMisc(ctxt);
9086
9087 /*
9088 * Then possibly doc type declaration(s) and more Misc
9089 * (doctypedecl Misc*)?
9090 */
9091 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009092 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009093
9094 ctxt->inSubset = 1;
9095 xmlParseDocTypeDecl(ctxt);
9096 if (RAW == '[') {
9097 ctxt->instate = XML_PARSER_DTD;
9098 xmlParseInternalSubset(ctxt);
9099 }
9100
9101 /*
9102 * Create and update the external subset.
9103 */
9104 ctxt->inSubset = 2;
9105 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9106 (!ctxt->disableSAX))
9107 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9108 ctxt->extSubSystem, ctxt->extSubURI);
9109 ctxt->inSubset = 0;
9110
9111
9112 ctxt->instate = XML_PARSER_PROLOG;
9113 xmlParseMisc(ctxt);
9114 }
9115
9116 /*
9117 * Time to start parsing the tree itself
9118 */
9119 GROW;
9120 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009121 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9122 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009123 } else {
9124 ctxt->instate = XML_PARSER_CONTENT;
9125 xmlParseElement(ctxt);
9126 ctxt->instate = XML_PARSER_EPILOG;
9127
9128
9129 /*
9130 * The Misc part at the end
9131 */
9132 xmlParseMisc(ctxt);
9133
Daniel Veillard561b7f82002-03-20 21:55:57 +00009134 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009135 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009136 }
9137 ctxt->instate = XML_PARSER_EOF;
9138 }
9139
9140 /*
9141 * SAX: end of the document processing.
9142 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009143 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009144 ctxt->sax->endDocument(ctxt->userData);
9145
Daniel Veillard5997aca2002-03-18 18:36:20 +00009146 /*
9147 * Remove locally kept entity definitions if the tree was not built
9148 */
9149 if ((ctxt->myDoc != NULL) &&
9150 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9151 xmlFreeDoc(ctxt->myDoc);
9152 ctxt->myDoc = NULL;
9153 }
9154
Daniel Veillardc7612992002-02-17 22:47:37 +00009155 if (! ctxt->wellFormed) {
9156 ctxt->valid = 0;
9157 return(-1);
9158 }
Owen Taylor3473f882001-02-23 17:55:21 +00009159 return(0);
9160}
9161
9162/**
9163 * xmlParseExtParsedEnt:
9164 * @ctxt: an XML parser context
9165 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009166 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009167 * An external general parsed entity is well-formed if it matches the
9168 * production labeled extParsedEnt.
9169 *
9170 * [78] extParsedEnt ::= TextDecl? content
9171 *
9172 * Returns 0, -1 in case of error. the parser context is augmented
9173 * as a result of the parsing.
9174 */
9175
9176int
9177xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9178 xmlChar start[4];
9179 xmlCharEncoding enc;
9180
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009181 if ((ctxt == NULL) || (ctxt->input == NULL))
9182 return(-1);
9183
Owen Taylor3473f882001-02-23 17:55:21 +00009184 xmlDefaultSAXHandlerInit();
9185
Daniel Veillard309f81d2003-09-23 09:02:53 +00009186 xmlDetectSAX2(ctxt);
9187
Owen Taylor3473f882001-02-23 17:55:21 +00009188 GROW;
9189
9190 /*
9191 * SAX: beginning of the document processing.
9192 */
9193 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9194 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9195
9196 /*
9197 * Get the 4 first bytes and decode the charset
9198 * if enc != XML_CHAR_ENCODING_NONE
9199 * plug some encoding conversion routines.
9200 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009201 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9202 start[0] = RAW;
9203 start[1] = NXT(1);
9204 start[2] = NXT(2);
9205 start[3] = NXT(3);
9206 enc = xmlDetectCharEncoding(start, 4);
9207 if (enc != XML_CHAR_ENCODING_NONE) {
9208 xmlSwitchEncoding(ctxt, enc);
9209 }
Owen Taylor3473f882001-02-23 17:55:21 +00009210 }
9211
9212
9213 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009215 }
9216
9217 /*
9218 * Check for the XMLDecl in the Prolog.
9219 */
9220 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009221 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009222
9223 /*
9224 * Note that we will switch encoding on the fly.
9225 */
9226 xmlParseXMLDecl(ctxt);
9227 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9228 /*
9229 * The XML REC instructs us to stop parsing right here
9230 */
9231 return(-1);
9232 }
9233 SKIP_BLANKS;
9234 } else {
9235 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9236 }
9237 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9238 ctxt->sax->startDocument(ctxt->userData);
9239
9240 /*
9241 * Doing validity checking on chunk doesn't make sense
9242 */
9243 ctxt->instate = XML_PARSER_CONTENT;
9244 ctxt->validate = 0;
9245 ctxt->loadsubset = 0;
9246 ctxt->depth = 0;
9247
9248 xmlParseContent(ctxt);
9249
9250 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009251 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009252 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009253 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009254 }
9255
9256 /*
9257 * SAX: end of the document processing.
9258 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009259 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009260 ctxt->sax->endDocument(ctxt->userData);
9261
9262 if (! ctxt->wellFormed) return(-1);
9263 return(0);
9264}
9265
Daniel Veillard73b013f2003-09-30 12:36:01 +00009266#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009267/************************************************************************
9268 * *
9269 * Progressive parsing interfaces *
9270 * *
9271 ************************************************************************/
9272
9273/**
9274 * xmlParseLookupSequence:
9275 * @ctxt: an XML parser context
9276 * @first: the first char to lookup
9277 * @next: the next char to lookup or zero
9278 * @third: the next char to lookup or zero
9279 *
9280 * Try to find if a sequence (first, next, third) or just (first next) or
9281 * (first) is available in the input stream.
9282 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9283 * to avoid rescanning sequences of bytes, it DOES change the state of the
9284 * parser, do not use liberally.
9285 *
9286 * Returns the index to the current parsing point if the full sequence
9287 * is available, -1 otherwise.
9288 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009289static int
Owen Taylor3473f882001-02-23 17:55:21 +00009290xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9291 xmlChar next, xmlChar third) {
9292 int base, len;
9293 xmlParserInputPtr in;
9294 const xmlChar *buf;
9295
9296 in = ctxt->input;
9297 if (in == NULL) return(-1);
9298 base = in->cur - in->base;
9299 if (base < 0) return(-1);
9300 if (ctxt->checkIndex > base)
9301 base = ctxt->checkIndex;
9302 if (in->buf == NULL) {
9303 buf = in->base;
9304 len = in->length;
9305 } else {
9306 buf = in->buf->buffer->content;
9307 len = in->buf->buffer->use;
9308 }
9309 /* take into account the sequence length */
9310 if (third) len -= 2;
9311 else if (next) len --;
9312 for (;base < len;base++) {
9313 if (buf[base] == first) {
9314 if (third != 0) {
9315 if ((buf[base + 1] != next) ||
9316 (buf[base + 2] != third)) continue;
9317 } else if (next != 0) {
9318 if (buf[base + 1] != next) continue;
9319 }
9320 ctxt->checkIndex = 0;
9321#ifdef DEBUG_PUSH
9322 if (next == 0)
9323 xmlGenericError(xmlGenericErrorContext,
9324 "PP: lookup '%c' found at %d\n",
9325 first, base);
9326 else if (third == 0)
9327 xmlGenericError(xmlGenericErrorContext,
9328 "PP: lookup '%c%c' found at %d\n",
9329 first, next, base);
9330 else
9331 xmlGenericError(xmlGenericErrorContext,
9332 "PP: lookup '%c%c%c' found at %d\n",
9333 first, next, third, base);
9334#endif
9335 return(base - (in->cur - in->base));
9336 }
9337 }
9338 ctxt->checkIndex = base;
9339#ifdef DEBUG_PUSH
9340 if (next == 0)
9341 xmlGenericError(xmlGenericErrorContext,
9342 "PP: lookup '%c' failed\n", first);
9343 else if (third == 0)
9344 xmlGenericError(xmlGenericErrorContext,
9345 "PP: lookup '%c%c' failed\n", first, next);
9346 else
9347 xmlGenericError(xmlGenericErrorContext,
9348 "PP: lookup '%c%c%c' failed\n", first, next, third);
9349#endif
9350 return(-1);
9351}
9352
9353/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009354 * xmlParseGetLasts:
9355 * @ctxt: an XML parser context
9356 * @lastlt: pointer to store the last '<' from the input
9357 * @lastgt: pointer to store the last '>' from the input
9358 *
9359 * Lookup the last < and > in the current chunk
9360 */
9361static void
9362xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9363 const xmlChar **lastgt) {
9364 const xmlChar *tmp;
9365
9366 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9367 xmlGenericError(xmlGenericErrorContext,
9368 "Internal error: xmlParseGetLasts\n");
9369 return;
9370 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009371 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009372 tmp = ctxt->input->end;
9373 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009374 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009375 if (tmp < ctxt->input->base) {
9376 *lastlt = NULL;
9377 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009378 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009379 *lastlt = tmp;
9380 tmp++;
9381 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9382 if (*tmp == '\'') {
9383 tmp++;
9384 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9385 if (tmp < ctxt->input->end) tmp++;
9386 } else if (*tmp == '"') {
9387 tmp++;
9388 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9389 if (tmp < ctxt->input->end) tmp++;
9390 } else
9391 tmp++;
9392 }
9393 if (tmp < ctxt->input->end)
9394 *lastgt = tmp;
9395 else {
9396 tmp = *lastlt;
9397 tmp--;
9398 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9399 if (tmp >= ctxt->input->base)
9400 *lastgt = tmp;
9401 else
9402 *lastgt = NULL;
9403 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009404 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009405 } else {
9406 *lastlt = NULL;
9407 *lastgt = NULL;
9408 }
9409}
9410/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009411 * xmlCheckCdataPush:
9412 * @cur: pointer to the bock of characters
9413 * @len: length of the block in bytes
9414 *
9415 * Check that the block of characters is okay as SCdata content [20]
9416 *
9417 * Returns the number of bytes to pass if okay, a negative index where an
9418 * UTF-8 error occured otherwise
9419 */
9420static int
9421xmlCheckCdataPush(const xmlChar *utf, int len) {
9422 int ix;
9423 unsigned char c;
9424 int codepoint;
9425
9426 if ((utf == NULL) || (len <= 0))
9427 return(0);
9428
9429 for (ix = 0; ix < len;) { /* string is 0-terminated */
9430 c = utf[ix];
9431 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9432 if (c >= 0x20)
9433 ix++;
9434 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9435 ix++;
9436 else
9437 return(-ix);
9438 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9439 if (ix + 2 > len) return(ix);
9440 if ((utf[ix+1] & 0xc0 ) != 0x80)
9441 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009442 codepoint = (utf[ix] & 0x1f) << 6;
9443 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009444 if (!xmlIsCharQ(codepoint))
9445 return(-ix);
9446 ix += 2;
9447 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9448 if (ix + 3 > len) return(ix);
9449 if (((utf[ix+1] & 0xc0) != 0x80) ||
9450 ((utf[ix+2] & 0xc0) != 0x80))
9451 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009452 codepoint = (utf[ix] & 0xf) << 12;
9453 codepoint |= (utf[ix+1] & 0x3f) << 6;
9454 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009455 if (!xmlIsCharQ(codepoint))
9456 return(-ix);
9457 ix += 3;
9458 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9459 if (ix + 4 > len) return(ix);
9460 if (((utf[ix+1] & 0xc0) != 0x80) ||
9461 ((utf[ix+2] & 0xc0) != 0x80) ||
9462 ((utf[ix+3] & 0xc0) != 0x80))
9463 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009464 codepoint = (utf[ix] & 0x7) << 18;
9465 codepoint |= (utf[ix+1] & 0x3f) << 12;
9466 codepoint |= (utf[ix+2] & 0x3f) << 6;
9467 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009468 if (!xmlIsCharQ(codepoint))
9469 return(-ix);
9470 ix += 4;
9471 } else /* unknown encoding */
9472 return(-ix);
9473 }
9474 return(ix);
9475}
9476
9477/**
Owen Taylor3473f882001-02-23 17:55:21 +00009478 * xmlParseTryOrFinish:
9479 * @ctxt: an XML parser context
9480 * @terminate: last chunk indicator
9481 *
9482 * Try to progress on parsing
9483 *
9484 * Returns zero if no parsing was possible
9485 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009486static int
Owen Taylor3473f882001-02-23 17:55:21 +00009487xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9488 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009489 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009490 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009491 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009492
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009493 if (ctxt->input == NULL)
9494 return(0);
9495
Owen Taylor3473f882001-02-23 17:55:21 +00009496#ifdef DEBUG_PUSH
9497 switch (ctxt->instate) {
9498 case XML_PARSER_EOF:
9499 xmlGenericError(xmlGenericErrorContext,
9500 "PP: try EOF\n"); break;
9501 case XML_PARSER_START:
9502 xmlGenericError(xmlGenericErrorContext,
9503 "PP: try START\n"); break;
9504 case XML_PARSER_MISC:
9505 xmlGenericError(xmlGenericErrorContext,
9506 "PP: try MISC\n");break;
9507 case XML_PARSER_COMMENT:
9508 xmlGenericError(xmlGenericErrorContext,
9509 "PP: try COMMENT\n");break;
9510 case XML_PARSER_PROLOG:
9511 xmlGenericError(xmlGenericErrorContext,
9512 "PP: try PROLOG\n");break;
9513 case XML_PARSER_START_TAG:
9514 xmlGenericError(xmlGenericErrorContext,
9515 "PP: try START_TAG\n");break;
9516 case XML_PARSER_CONTENT:
9517 xmlGenericError(xmlGenericErrorContext,
9518 "PP: try CONTENT\n");break;
9519 case XML_PARSER_CDATA_SECTION:
9520 xmlGenericError(xmlGenericErrorContext,
9521 "PP: try CDATA_SECTION\n");break;
9522 case XML_PARSER_END_TAG:
9523 xmlGenericError(xmlGenericErrorContext,
9524 "PP: try END_TAG\n");break;
9525 case XML_PARSER_ENTITY_DECL:
9526 xmlGenericError(xmlGenericErrorContext,
9527 "PP: try ENTITY_DECL\n");break;
9528 case XML_PARSER_ENTITY_VALUE:
9529 xmlGenericError(xmlGenericErrorContext,
9530 "PP: try ENTITY_VALUE\n");break;
9531 case XML_PARSER_ATTRIBUTE_VALUE:
9532 xmlGenericError(xmlGenericErrorContext,
9533 "PP: try ATTRIBUTE_VALUE\n");break;
9534 case XML_PARSER_DTD:
9535 xmlGenericError(xmlGenericErrorContext,
9536 "PP: try DTD\n");break;
9537 case XML_PARSER_EPILOG:
9538 xmlGenericError(xmlGenericErrorContext,
9539 "PP: try EPILOG\n");break;
9540 case XML_PARSER_PI:
9541 xmlGenericError(xmlGenericErrorContext,
9542 "PP: try PI\n");break;
9543 case XML_PARSER_IGNORE:
9544 xmlGenericError(xmlGenericErrorContext,
9545 "PP: try IGNORE\n");break;
9546 }
9547#endif
9548
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009549 if ((ctxt->input != NULL) &&
9550 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009551 xmlSHRINK(ctxt);
9552 ctxt->checkIndex = 0;
9553 }
9554 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009555
Daniel Veillarda880b122003-04-21 21:36:41 +00009556 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009557 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009558 return(0);
9559
9560
Owen Taylor3473f882001-02-23 17:55:21 +00009561 /*
9562 * Pop-up of finished entities.
9563 */
9564 while ((RAW == 0) && (ctxt->inputNr > 1))
9565 xmlPopInput(ctxt);
9566
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009567 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009568 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009569 avail = ctxt->input->length -
9570 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009571 else {
9572 /*
9573 * If we are operating on converted input, try to flush
9574 * remainng chars to avoid them stalling in the non-converted
9575 * buffer.
9576 */
9577 if ((ctxt->input->buf->raw != NULL) &&
9578 (ctxt->input->buf->raw->use > 0)) {
9579 int base = ctxt->input->base -
9580 ctxt->input->buf->buffer->content;
9581 int current = ctxt->input->cur - ctxt->input->base;
9582
9583 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9584 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9585 ctxt->input->cur = ctxt->input->base + current;
9586 ctxt->input->end =
9587 &ctxt->input->buf->buffer->content[
9588 ctxt->input->buf->buffer->use];
9589 }
9590 avail = ctxt->input->buf->buffer->use -
9591 (ctxt->input->cur - ctxt->input->base);
9592 }
Owen Taylor3473f882001-02-23 17:55:21 +00009593 if (avail < 1)
9594 goto done;
9595 switch (ctxt->instate) {
9596 case XML_PARSER_EOF:
9597 /*
9598 * Document parsing is done !
9599 */
9600 goto done;
9601 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009602 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9603 xmlChar start[4];
9604 xmlCharEncoding enc;
9605
9606 /*
9607 * Very first chars read from the document flow.
9608 */
9609 if (avail < 4)
9610 goto done;
9611
9612 /*
9613 * Get the 4 first bytes and decode the charset
9614 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009615 * plug some encoding conversion routines,
9616 * else xmlSwitchEncoding will set to (default)
9617 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009618 */
9619 start[0] = RAW;
9620 start[1] = NXT(1);
9621 start[2] = NXT(2);
9622 start[3] = NXT(3);
9623 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009624 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009625 break;
9626 }
Owen Taylor3473f882001-02-23 17:55:21 +00009627
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009628 if (avail < 2)
9629 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009630 cur = ctxt->input->cur[0];
9631 next = ctxt->input->cur[1];
9632 if (cur == 0) {
9633 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9634 ctxt->sax->setDocumentLocator(ctxt->userData,
9635 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009636 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009637 ctxt->instate = XML_PARSER_EOF;
9638#ifdef DEBUG_PUSH
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: entering EOF\n");
9641#endif
9642 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9643 ctxt->sax->endDocument(ctxt->userData);
9644 goto done;
9645 }
9646 if ((cur == '<') && (next == '?')) {
9647 /* PI or XML decl */
9648 if (avail < 5) return(ret);
9649 if ((!terminate) &&
9650 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9651 return(ret);
9652 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9653 ctxt->sax->setDocumentLocator(ctxt->userData,
9654 &xmlDefaultSAXLocator);
9655 if ((ctxt->input->cur[2] == 'x') &&
9656 (ctxt->input->cur[3] == 'm') &&
9657 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009658 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009659 ret += 5;
9660#ifdef DEBUG_PUSH
9661 xmlGenericError(xmlGenericErrorContext,
9662 "PP: Parsing XML Decl\n");
9663#endif
9664 xmlParseXMLDecl(ctxt);
9665 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9666 /*
9667 * The XML REC instructs us to stop parsing right
9668 * here
9669 */
9670 ctxt->instate = XML_PARSER_EOF;
9671 return(0);
9672 }
9673 ctxt->standalone = ctxt->input->standalone;
9674 if ((ctxt->encoding == NULL) &&
9675 (ctxt->input->encoding != NULL))
9676 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9677 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9678 (!ctxt->disableSAX))
9679 ctxt->sax->startDocument(ctxt->userData);
9680 ctxt->instate = XML_PARSER_MISC;
9681#ifdef DEBUG_PUSH
9682 xmlGenericError(xmlGenericErrorContext,
9683 "PP: entering MISC\n");
9684#endif
9685 } else {
9686 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9687 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9688 (!ctxt->disableSAX))
9689 ctxt->sax->startDocument(ctxt->userData);
9690 ctxt->instate = XML_PARSER_MISC;
9691#ifdef DEBUG_PUSH
9692 xmlGenericError(xmlGenericErrorContext,
9693 "PP: entering MISC\n");
9694#endif
9695 }
9696 } else {
9697 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9698 ctxt->sax->setDocumentLocator(ctxt->userData,
9699 &xmlDefaultSAXLocator);
9700 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009701 if (ctxt->version == NULL) {
9702 xmlErrMemory(ctxt, NULL);
9703 break;
9704 }
Owen Taylor3473f882001-02-23 17:55:21 +00009705 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9706 (!ctxt->disableSAX))
9707 ctxt->sax->startDocument(ctxt->userData);
9708 ctxt->instate = XML_PARSER_MISC;
9709#ifdef DEBUG_PUSH
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: entering MISC\n");
9712#endif
9713 }
9714 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009715 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009716 const xmlChar *name;
9717 const xmlChar *prefix;
9718 const xmlChar *URI;
9719 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009720
9721 if ((avail < 2) && (ctxt->inputNr == 1))
9722 goto done;
9723 cur = ctxt->input->cur[0];
9724 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009725 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009726 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009727 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9728 ctxt->sax->endDocument(ctxt->userData);
9729 goto done;
9730 }
9731 if (!terminate) {
9732 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009733 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009734 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009735 goto done;
9736 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9737 goto done;
9738 }
9739 }
9740 if (ctxt->spaceNr == 0)
9741 spacePush(ctxt, -1);
9742 else
9743 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009744#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009745 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009746#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009747 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009748#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009749 else
9750 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009751#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009752 if (name == NULL) {
9753 spacePop(ctxt);
9754 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009755 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9756 ctxt->sax->endDocument(ctxt->userData);
9757 goto done;
9758 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009759#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009760 /*
9761 * [ VC: Root Element Type ]
9762 * The Name in the document type declaration must match
9763 * the element type of the root element.
9764 */
9765 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9766 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9767 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009768#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009769
9770 /*
9771 * Check for an Empty Element.
9772 */
9773 if ((RAW == '/') && (NXT(1) == '>')) {
9774 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009775
9776 if (ctxt->sax2) {
9777 if ((ctxt->sax != NULL) &&
9778 (ctxt->sax->endElementNs != NULL) &&
9779 (!ctxt->disableSAX))
9780 ctxt->sax->endElementNs(ctxt->userData, name,
9781 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009782 if (ctxt->nsNr - nsNr > 0)
9783 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009784#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009785 } else {
9786 if ((ctxt->sax != NULL) &&
9787 (ctxt->sax->endElement != NULL) &&
9788 (!ctxt->disableSAX))
9789 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009790#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009791 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009792 spacePop(ctxt);
9793 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009794 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009795 } else {
9796 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009797 }
9798 break;
9799 }
9800 if (RAW == '>') {
9801 NEXT;
9802 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009803 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009804 "Couldn't find end of Start Tag %s\n",
9805 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009806 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009807 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009808 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009809 if (ctxt->sax2)
9810 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009811#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009812 else
9813 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009814#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009815
Daniel Veillarda880b122003-04-21 21:36:41 +00009816 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009817 break;
9818 }
9819 case XML_PARSER_CONTENT: {
9820 const xmlChar *test;
9821 unsigned int cons;
9822 if ((avail < 2) && (ctxt->inputNr == 1))
9823 goto done;
9824 cur = ctxt->input->cur[0];
9825 next = ctxt->input->cur[1];
9826
9827 test = CUR_PTR;
9828 cons = ctxt->input->consumed;
9829 if ((cur == '<') && (next == '/')) {
9830 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009831 break;
9832 } else if ((cur == '<') && (next == '?')) {
9833 if ((!terminate) &&
9834 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9835 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009836 xmlParsePI(ctxt);
9837 } else if ((cur == '<') && (next != '!')) {
9838 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009839 break;
9840 } else if ((cur == '<') && (next == '!') &&
9841 (ctxt->input->cur[2] == '-') &&
9842 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009843 int term;
9844
9845 if (avail < 4)
9846 goto done;
9847 ctxt->input->cur += 4;
9848 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9849 ctxt->input->cur -= 4;
9850 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009852 xmlParseComment(ctxt);
9853 ctxt->instate = XML_PARSER_CONTENT;
9854 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9855 (ctxt->input->cur[2] == '[') &&
9856 (ctxt->input->cur[3] == 'C') &&
9857 (ctxt->input->cur[4] == 'D') &&
9858 (ctxt->input->cur[5] == 'A') &&
9859 (ctxt->input->cur[6] == 'T') &&
9860 (ctxt->input->cur[7] == 'A') &&
9861 (ctxt->input->cur[8] == '[')) {
9862 SKIP(9);
9863 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009864 break;
9865 } else if ((cur == '<') && (next == '!') &&
9866 (avail < 9)) {
9867 goto done;
9868 } else if (cur == '&') {
9869 if ((!terminate) &&
9870 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9871 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009872 xmlParseReference(ctxt);
9873 } else {
9874 /* TODO Avoid the extra copy, handle directly !!! */
9875 /*
9876 * Goal of the following test is:
9877 * - minimize calls to the SAX 'character' callback
9878 * when they are mergeable
9879 * - handle an problem for isBlank when we only parse
9880 * a sequence of blank chars and the next one is
9881 * not available to check against '<' presence.
9882 * - tries to homogenize the differences in SAX
9883 * callbacks between the push and pull versions
9884 * of the parser.
9885 */
9886 if ((ctxt->inputNr == 1) &&
9887 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9888 if (!terminate) {
9889 if (ctxt->progressive) {
9890 if ((lastlt == NULL) ||
9891 (ctxt->input->cur > lastlt))
9892 goto done;
9893 } else if (xmlParseLookupSequence(ctxt,
9894 '<', 0, 0) < 0) {
9895 goto done;
9896 }
9897 }
9898 }
9899 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009900 xmlParseCharData(ctxt, 0);
9901 }
9902 /*
9903 * Pop-up of finished entities.
9904 */
9905 while ((RAW == 0) && (ctxt->inputNr > 1))
9906 xmlPopInput(ctxt);
9907 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009908 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9909 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009910 ctxt->instate = XML_PARSER_EOF;
9911 break;
9912 }
9913 break;
9914 }
9915 case XML_PARSER_END_TAG:
9916 if (avail < 2)
9917 goto done;
9918 if (!terminate) {
9919 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009920 /* > can be found unescaped in attribute values */
9921 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009922 goto done;
9923 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9924 goto done;
9925 }
9926 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009927 if (ctxt->sax2) {
9928 xmlParseEndTag2(ctxt,
9929 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9930 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009931 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009932 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009933 }
9934#ifdef LIBXML_SAX1_ENABLED
9935 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009936 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009937#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009938 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009940 } else {
9941 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009942 }
9943 break;
9944 case XML_PARSER_CDATA_SECTION: {
9945 /*
9946 * The Push mode need to have the SAX callback for
9947 * cdataBlock merge back contiguous callbacks.
9948 */
9949 int base;
9950
9951 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9952 if (base < 0) {
9953 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009954 int tmp;
9955
9956 tmp = xmlCheckCdataPush(ctxt->input->cur,
9957 XML_PARSER_BIG_BUFFER_SIZE);
9958 if (tmp < 0) {
9959 tmp = -tmp;
9960 ctxt->input->cur += tmp;
9961 goto encoding_error;
9962 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9964 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009965 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009966 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009967 else if (ctxt->sax->characters != NULL)
9968 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009969 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009970 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009971 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009972 ctxt->checkIndex = 0;
9973 }
9974 goto done;
9975 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009976 int tmp;
9977
9978 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9979 if ((tmp < 0) || (tmp != base)) {
9980 tmp = -tmp;
9981 ctxt->input->cur += tmp;
9982 goto encoding_error;
9983 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009984 if ((ctxt->sax != NULL) && (base > 0) &&
9985 (!ctxt->disableSAX)) {
9986 if (ctxt->sax->cdataBlock != NULL)
9987 ctxt->sax->cdataBlock(ctxt->userData,
9988 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009989 else if (ctxt->sax->characters != NULL)
9990 ctxt->sax->characters(ctxt->userData,
9991 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009992 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009993 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009994 ctxt->checkIndex = 0;
9995 ctxt->instate = XML_PARSER_CONTENT;
9996#ifdef DEBUG_PUSH
9997 xmlGenericError(xmlGenericErrorContext,
9998 "PP: entering CONTENT\n");
9999#endif
10000 }
10001 break;
10002 }
Owen Taylor3473f882001-02-23 17:55:21 +000010003 case XML_PARSER_MISC:
10004 SKIP_BLANKS;
10005 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010006 avail = ctxt->input->length -
10007 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010008 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010009 avail = ctxt->input->buf->buffer->use -
10010 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010011 if (avail < 2)
10012 goto done;
10013 cur = ctxt->input->cur[0];
10014 next = ctxt->input->cur[1];
10015 if ((cur == '<') && (next == '?')) {
10016 if ((!terminate) &&
10017 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10018 goto done;
10019#ifdef DEBUG_PUSH
10020 xmlGenericError(xmlGenericErrorContext,
10021 "PP: Parsing PI\n");
10022#endif
10023 xmlParsePI(ctxt);
10024 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010025 (ctxt->input->cur[2] == '-') &&
10026 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010027 if ((!terminate) &&
10028 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10029 goto done;
10030#ifdef DEBUG_PUSH
10031 xmlGenericError(xmlGenericErrorContext,
10032 "PP: Parsing Comment\n");
10033#endif
10034 xmlParseComment(ctxt);
10035 ctxt->instate = XML_PARSER_MISC;
10036 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010037 (ctxt->input->cur[2] == 'D') &&
10038 (ctxt->input->cur[3] == 'O') &&
10039 (ctxt->input->cur[4] == 'C') &&
10040 (ctxt->input->cur[5] == 'T') &&
10041 (ctxt->input->cur[6] == 'Y') &&
10042 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010043 (ctxt->input->cur[8] == 'E')) {
10044 if ((!terminate) &&
10045 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10046 goto done;
10047#ifdef DEBUG_PUSH
10048 xmlGenericError(xmlGenericErrorContext,
10049 "PP: Parsing internal subset\n");
10050#endif
10051 ctxt->inSubset = 1;
10052 xmlParseDocTypeDecl(ctxt);
10053 if (RAW == '[') {
10054 ctxt->instate = XML_PARSER_DTD;
10055#ifdef DEBUG_PUSH
10056 xmlGenericError(xmlGenericErrorContext,
10057 "PP: entering DTD\n");
10058#endif
10059 } else {
10060 /*
10061 * Create and update the external subset.
10062 */
10063 ctxt->inSubset = 2;
10064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10065 (ctxt->sax->externalSubset != NULL))
10066 ctxt->sax->externalSubset(ctxt->userData,
10067 ctxt->intSubName, ctxt->extSubSystem,
10068 ctxt->extSubURI);
10069 ctxt->inSubset = 0;
10070 ctxt->instate = XML_PARSER_PROLOG;
10071#ifdef DEBUG_PUSH
10072 xmlGenericError(xmlGenericErrorContext,
10073 "PP: entering PROLOG\n");
10074#endif
10075 }
10076 } else if ((cur == '<') && (next == '!') &&
10077 (avail < 9)) {
10078 goto done;
10079 } else {
10080 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010081 ctxt->progressive = 1;
10082 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010083#ifdef DEBUG_PUSH
10084 xmlGenericError(xmlGenericErrorContext,
10085 "PP: entering START_TAG\n");
10086#endif
10087 }
10088 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010089 case XML_PARSER_PROLOG:
10090 SKIP_BLANKS;
10091 if (ctxt->input->buf == NULL)
10092 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10093 else
10094 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10095 if (avail < 2)
10096 goto done;
10097 cur = ctxt->input->cur[0];
10098 next = ctxt->input->cur[1];
10099 if ((cur == '<') && (next == '?')) {
10100 if ((!terminate) &&
10101 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10102 goto done;
10103#ifdef DEBUG_PUSH
10104 xmlGenericError(xmlGenericErrorContext,
10105 "PP: Parsing PI\n");
10106#endif
10107 xmlParsePI(ctxt);
10108 } else if ((cur == '<') && (next == '!') &&
10109 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10110 if ((!terminate) &&
10111 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10112 goto done;
10113#ifdef DEBUG_PUSH
10114 xmlGenericError(xmlGenericErrorContext,
10115 "PP: Parsing Comment\n");
10116#endif
10117 xmlParseComment(ctxt);
10118 ctxt->instate = XML_PARSER_PROLOG;
10119 } else if ((cur == '<') && (next == '!') &&
10120 (avail < 4)) {
10121 goto done;
10122 } else {
10123 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010124 if (ctxt->progressive == 0)
10125 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010126 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010127#ifdef DEBUG_PUSH
10128 xmlGenericError(xmlGenericErrorContext,
10129 "PP: entering START_TAG\n");
10130#endif
10131 }
10132 break;
10133 case XML_PARSER_EPILOG:
10134 SKIP_BLANKS;
10135 if (ctxt->input->buf == NULL)
10136 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10137 else
10138 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10139 if (avail < 2)
10140 goto done;
10141 cur = ctxt->input->cur[0];
10142 next = ctxt->input->cur[1];
10143 if ((cur == '<') && (next == '?')) {
10144 if ((!terminate) &&
10145 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10146 goto done;
10147#ifdef DEBUG_PUSH
10148 xmlGenericError(xmlGenericErrorContext,
10149 "PP: Parsing PI\n");
10150#endif
10151 xmlParsePI(ctxt);
10152 ctxt->instate = XML_PARSER_EPILOG;
10153 } else if ((cur == '<') && (next == '!') &&
10154 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10155 if ((!terminate) &&
10156 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10157 goto done;
10158#ifdef DEBUG_PUSH
10159 xmlGenericError(xmlGenericErrorContext,
10160 "PP: Parsing Comment\n");
10161#endif
10162 xmlParseComment(ctxt);
10163 ctxt->instate = XML_PARSER_EPILOG;
10164 } else if ((cur == '<') && (next == '!') &&
10165 (avail < 4)) {
10166 goto done;
10167 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010168 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010169 ctxt->instate = XML_PARSER_EOF;
10170#ifdef DEBUG_PUSH
10171 xmlGenericError(xmlGenericErrorContext,
10172 "PP: entering EOF\n");
10173#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010174 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010175 ctxt->sax->endDocument(ctxt->userData);
10176 goto done;
10177 }
10178 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010179 case XML_PARSER_DTD: {
10180 /*
10181 * Sorry but progressive parsing of the internal subset
10182 * is not expected to be supported. We first check that
10183 * the full content of the internal subset is available and
10184 * the parsing is launched only at that point.
10185 * Internal subset ends up with "']' S? '>'" in an unescaped
10186 * section and not in a ']]>' sequence which are conditional
10187 * sections (whoever argued to keep that crap in XML deserve
10188 * a place in hell !).
10189 */
10190 int base, i;
10191 xmlChar *buf;
10192 xmlChar quote = 0;
10193
10194 base = ctxt->input->cur - ctxt->input->base;
10195 if (base < 0) return(0);
10196 if (ctxt->checkIndex > base)
10197 base = ctxt->checkIndex;
10198 buf = ctxt->input->buf->buffer->content;
10199 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10200 base++) {
10201 if (quote != 0) {
10202 if (buf[base] == quote)
10203 quote = 0;
10204 continue;
10205 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010206 if ((quote == 0) && (buf[base] == '<')) {
10207 int found = 0;
10208 /* special handling of comments */
10209 if (((unsigned int) base + 4 <
10210 ctxt->input->buf->buffer->use) &&
10211 (buf[base + 1] == '!') &&
10212 (buf[base + 2] == '-') &&
10213 (buf[base + 3] == '-')) {
10214 for (;(unsigned int) base + 3 <
10215 ctxt->input->buf->buffer->use; base++) {
10216 if ((buf[base] == '-') &&
10217 (buf[base + 1] == '-') &&
10218 (buf[base + 2] == '>')) {
10219 found = 1;
10220 base += 2;
10221 break;
10222 }
10223 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010224 if (!found) {
10225#if 0
10226 fprintf(stderr, "unfinished comment\n");
10227#endif
10228 break; /* for */
10229 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010230 continue;
10231 }
10232 }
Owen Taylor3473f882001-02-23 17:55:21 +000010233 if (buf[base] == '"') {
10234 quote = '"';
10235 continue;
10236 }
10237 if (buf[base] == '\'') {
10238 quote = '\'';
10239 continue;
10240 }
10241 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010242#if 0
10243 fprintf(stderr, "%c%c%c%c: ", buf[base],
10244 buf[base + 1], buf[base + 2], buf[base + 3]);
10245#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010246 if ((unsigned int) base +1 >=
10247 ctxt->input->buf->buffer->use)
10248 break;
10249 if (buf[base + 1] == ']') {
10250 /* conditional crap, skip both ']' ! */
10251 base++;
10252 continue;
10253 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010254 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010255 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10256 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010257 if (buf[base + i] == '>') {
10258#if 0
10259 fprintf(stderr, "found\n");
10260#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010261 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010262 }
10263 if (!IS_BLANK_CH(buf[base + i])) {
10264#if 0
10265 fprintf(stderr, "not found\n");
10266#endif
10267 goto not_end_of_int_subset;
10268 }
Owen Taylor3473f882001-02-23 17:55:21 +000010269 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010270#if 0
10271 fprintf(stderr, "end of stream\n");
10272#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010273 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010274
Owen Taylor3473f882001-02-23 17:55:21 +000010275 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010276not_end_of_int_subset:
10277 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010278 }
10279 /*
10280 * We didn't found the end of the Internal subset
10281 */
Owen Taylor3473f882001-02-23 17:55:21 +000010282#ifdef DEBUG_PUSH
10283 if (next == 0)
10284 xmlGenericError(xmlGenericErrorContext,
10285 "PP: lookup of int subset end filed\n");
10286#endif
10287 goto done;
10288
10289found_end_int_subset:
10290 xmlParseInternalSubset(ctxt);
10291 ctxt->inSubset = 2;
10292 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10293 (ctxt->sax->externalSubset != NULL))
10294 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10295 ctxt->extSubSystem, ctxt->extSubURI);
10296 ctxt->inSubset = 0;
10297 ctxt->instate = XML_PARSER_PROLOG;
10298 ctxt->checkIndex = 0;
10299#ifdef DEBUG_PUSH
10300 xmlGenericError(xmlGenericErrorContext,
10301 "PP: entering PROLOG\n");
10302#endif
10303 break;
10304 }
10305 case XML_PARSER_COMMENT:
10306 xmlGenericError(xmlGenericErrorContext,
10307 "PP: internal error, state == COMMENT\n");
10308 ctxt->instate = XML_PARSER_CONTENT;
10309#ifdef DEBUG_PUSH
10310 xmlGenericError(xmlGenericErrorContext,
10311 "PP: entering CONTENT\n");
10312#endif
10313 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010314 case XML_PARSER_IGNORE:
10315 xmlGenericError(xmlGenericErrorContext,
10316 "PP: internal error, state == IGNORE");
10317 ctxt->instate = XML_PARSER_DTD;
10318#ifdef DEBUG_PUSH
10319 xmlGenericError(xmlGenericErrorContext,
10320 "PP: entering DTD\n");
10321#endif
10322 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010323 case XML_PARSER_PI:
10324 xmlGenericError(xmlGenericErrorContext,
10325 "PP: internal error, state == PI\n");
10326 ctxt->instate = XML_PARSER_CONTENT;
10327#ifdef DEBUG_PUSH
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: entering CONTENT\n");
10330#endif
10331 break;
10332 case XML_PARSER_ENTITY_DECL:
10333 xmlGenericError(xmlGenericErrorContext,
10334 "PP: internal error, state == ENTITY_DECL\n");
10335 ctxt->instate = XML_PARSER_DTD;
10336#ifdef DEBUG_PUSH
10337 xmlGenericError(xmlGenericErrorContext,
10338 "PP: entering DTD\n");
10339#endif
10340 break;
10341 case XML_PARSER_ENTITY_VALUE:
10342 xmlGenericError(xmlGenericErrorContext,
10343 "PP: internal error, state == ENTITY_VALUE\n");
10344 ctxt->instate = XML_PARSER_CONTENT;
10345#ifdef DEBUG_PUSH
10346 xmlGenericError(xmlGenericErrorContext,
10347 "PP: entering DTD\n");
10348#endif
10349 break;
10350 case XML_PARSER_ATTRIBUTE_VALUE:
10351 xmlGenericError(xmlGenericErrorContext,
10352 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10353 ctxt->instate = XML_PARSER_START_TAG;
10354#ifdef DEBUG_PUSH
10355 xmlGenericError(xmlGenericErrorContext,
10356 "PP: entering START_TAG\n");
10357#endif
10358 break;
10359 case XML_PARSER_SYSTEM_LITERAL:
10360 xmlGenericError(xmlGenericErrorContext,
10361 "PP: internal error, state == SYSTEM_LITERAL\n");
10362 ctxt->instate = XML_PARSER_START_TAG;
10363#ifdef DEBUG_PUSH
10364 xmlGenericError(xmlGenericErrorContext,
10365 "PP: entering START_TAG\n");
10366#endif
10367 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010368 case XML_PARSER_PUBLIC_LITERAL:
10369 xmlGenericError(xmlGenericErrorContext,
10370 "PP: internal error, state == PUBLIC_LITERAL\n");
10371 ctxt->instate = XML_PARSER_START_TAG;
10372#ifdef DEBUG_PUSH
10373 xmlGenericError(xmlGenericErrorContext,
10374 "PP: entering START_TAG\n");
10375#endif
10376 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
10378 }
10379done:
10380#ifdef DEBUG_PUSH
10381 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10382#endif
10383 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010384encoding_error:
10385 {
10386 char buffer[150];
10387
10388 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10389 ctxt->input->cur[0], ctxt->input->cur[1],
10390 ctxt->input->cur[2], ctxt->input->cur[3]);
10391 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10392 "Input is not proper UTF-8, indicate encoding !\n%s",
10393 BAD_CAST buffer, NULL);
10394 }
10395 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010396}
10397
10398/**
Owen Taylor3473f882001-02-23 17:55:21 +000010399 * xmlParseChunk:
10400 * @ctxt: an XML parser context
10401 * @chunk: an char array
10402 * @size: the size in byte of the chunk
10403 * @terminate: last chunk indicator
10404 *
10405 * Parse a Chunk of memory
10406 *
10407 * Returns zero if no error, the xmlParserErrors otherwise.
10408 */
10409int
10410xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10411 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010412 int end_in_lf = 0;
10413
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010414 if (ctxt == NULL)
10415 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010416 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010417 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010418 if (ctxt->instate == XML_PARSER_START)
10419 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010420 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10421 (chunk[size - 1] == '\r')) {
10422 end_in_lf = 1;
10423 size--;
10424 }
Owen Taylor3473f882001-02-23 17:55:21 +000010425 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10426 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10427 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10428 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010429 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010430
William M. Bracka3215c72004-07-31 16:24:01 +000010431 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10432 if (res < 0) {
10433 ctxt->errNo = XML_PARSER_EOF;
10434 ctxt->disableSAX = 1;
10435 return (XML_PARSER_EOF);
10436 }
Owen Taylor3473f882001-02-23 17:55:21 +000010437 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10438 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010439 ctxt->input->end =
10440 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10443#endif
10444
Owen Taylor3473f882001-02-23 17:55:21 +000010445 } else if (ctxt->instate != XML_PARSER_EOF) {
10446 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10447 xmlParserInputBufferPtr in = ctxt->input->buf;
10448 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10449 (in->raw != NULL)) {
10450 int nbchars;
10451
10452 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10453 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010454 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010455 xmlGenericError(xmlGenericErrorContext,
10456 "xmlParseChunk: encoder error\n");
10457 return(XML_ERR_INVALID_ENCODING);
10458 }
10459 }
10460 }
10461 }
10462 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010463 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10464 (ctxt->input->buf != NULL)) {
10465 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10466 }
Daniel Veillard14412512005-01-21 23:53:26 +000010467 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010468 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010469 if (terminate) {
10470 /*
10471 * Check for termination
10472 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010473 int avail = 0;
10474
10475 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010476 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010477 avail = ctxt->input->length -
10478 (ctxt->input->cur - ctxt->input->base);
10479 else
10480 avail = ctxt->input->buf->buffer->use -
10481 (ctxt->input->cur - ctxt->input->base);
10482 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010483
Owen Taylor3473f882001-02-23 17:55:21 +000010484 if ((ctxt->instate != XML_PARSER_EOF) &&
10485 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010486 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010487 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010488 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010489 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010490 }
Owen Taylor3473f882001-02-23 17:55:21 +000010491 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010492 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010493 ctxt->sax->endDocument(ctxt->userData);
10494 }
10495 ctxt->instate = XML_PARSER_EOF;
10496 }
10497 return((xmlParserErrors) ctxt->errNo);
10498}
10499
10500/************************************************************************
10501 * *
10502 * I/O front end functions to the parser *
10503 * *
10504 ************************************************************************/
10505
10506/**
Owen Taylor3473f882001-02-23 17:55:21 +000010507 * xmlCreatePushParserCtxt:
10508 * @sax: a SAX handler
10509 * @user_data: The user data returned on SAX callbacks
10510 * @chunk: a pointer to an array of chars
10511 * @size: number of chars in the array
10512 * @filename: an optional file name or URI
10513 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010514 * Create a parser context for using the XML parser in push mode.
10515 * If @buffer and @size are non-NULL, the data is used to detect
10516 * the encoding. The remaining characters will be parsed so they
10517 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010518 * To allow content encoding detection, @size should be >= 4
10519 * The value of @filename is used for fetching external entities
10520 * and error/warning reports.
10521 *
10522 * Returns the new parser context or NULL
10523 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010524
Owen Taylor3473f882001-02-23 17:55:21 +000010525xmlParserCtxtPtr
10526xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10527 const char *chunk, int size, const char *filename) {
10528 xmlParserCtxtPtr ctxt;
10529 xmlParserInputPtr inputStream;
10530 xmlParserInputBufferPtr buf;
10531 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10532
10533 /*
10534 * plug some encoding conversion routines
10535 */
10536 if ((chunk != NULL) && (size >= 4))
10537 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10538
10539 buf = xmlAllocParserInputBuffer(enc);
10540 if (buf == NULL) return(NULL);
10541
10542 ctxt = xmlNewParserCtxt();
10543 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010544 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010545 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010546 return(NULL);
10547 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010548 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010549 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10550 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010551 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010552 xmlFreeParserInputBuffer(buf);
10553 xmlFreeParserCtxt(ctxt);
10554 return(NULL);
10555 }
Owen Taylor3473f882001-02-23 17:55:21 +000010556 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010557#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010558 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010559#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlFree(ctxt->sax);
10561 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10562 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010563 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010564 xmlFreeParserInputBuffer(buf);
10565 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010566 return(NULL);
10567 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010568 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10569 if (sax->initialized == XML_SAX2_MAGIC)
10570 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10571 else
10572 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010573 if (user_data != NULL)
10574 ctxt->userData = user_data;
10575 }
10576 if (filename == NULL) {
10577 ctxt->directory = NULL;
10578 } else {
10579 ctxt->directory = xmlParserGetDirectory(filename);
10580 }
10581
10582 inputStream = xmlNewInputStream(ctxt);
10583 if (inputStream == NULL) {
10584 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010585 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010586 return(NULL);
10587 }
10588
10589 if (filename == NULL)
10590 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010591 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010592 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010593 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010594 if (inputStream->filename == NULL) {
10595 xmlFreeParserCtxt(ctxt);
10596 xmlFreeParserInputBuffer(buf);
10597 return(NULL);
10598 }
10599 }
Owen Taylor3473f882001-02-23 17:55:21 +000010600 inputStream->buf = buf;
10601 inputStream->base = inputStream->buf->buffer->content;
10602 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010603 inputStream->end =
10604 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010605
10606 inputPush(ctxt, inputStream);
10607
William M. Brack3a1cd212005-02-11 14:35:54 +000010608 /*
10609 * If the caller didn't provide an initial 'chunk' for determining
10610 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10611 * that it can be automatically determined later
10612 */
10613 if ((size == 0) || (chunk == NULL)) {
10614 ctxt->charset = XML_CHAR_ENCODING_NONE;
10615 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010616 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10617 int cur = ctxt->input->cur - ctxt->input->base;
10618
Owen Taylor3473f882001-02-23 17:55:21 +000010619 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010620
10621 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10622 ctxt->input->cur = ctxt->input->base + cur;
10623 ctxt->input->end =
10624 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010625#ifdef DEBUG_PUSH
10626 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10627#endif
10628 }
10629
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010630 if (enc != XML_CHAR_ENCODING_NONE) {
10631 xmlSwitchEncoding(ctxt, enc);
10632 }
10633
Owen Taylor3473f882001-02-23 17:55:21 +000010634 return(ctxt);
10635}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010636#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010637
10638/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010639 * xmlStopParser:
10640 * @ctxt: an XML parser context
10641 *
10642 * Blocks further parser processing
10643 */
10644void
10645xmlStopParser(xmlParserCtxtPtr ctxt) {
10646 if (ctxt == NULL)
10647 return;
10648 ctxt->instate = XML_PARSER_EOF;
10649 ctxt->disableSAX = 1;
10650 if (ctxt->input != NULL) {
10651 ctxt->input->cur = BAD_CAST"";
10652 ctxt->input->base = ctxt->input->cur;
10653 }
10654}
10655
10656/**
Owen Taylor3473f882001-02-23 17:55:21 +000010657 * xmlCreateIOParserCtxt:
10658 * @sax: a SAX handler
10659 * @user_data: The user data returned on SAX callbacks
10660 * @ioread: an I/O read function
10661 * @ioclose: an I/O close function
10662 * @ioctx: an I/O handler
10663 * @enc: the charset encoding if known
10664 *
10665 * Create a parser context for using the XML parser with an existing
10666 * I/O stream
10667 *
10668 * Returns the new parser context or NULL
10669 */
10670xmlParserCtxtPtr
10671xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10672 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10673 void *ioctx, xmlCharEncoding enc) {
10674 xmlParserCtxtPtr ctxt;
10675 xmlParserInputPtr inputStream;
10676 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010677
10678 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010679
10680 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10681 if (buf == NULL) return(NULL);
10682
10683 ctxt = xmlNewParserCtxt();
10684 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010685 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010686 return(NULL);
10687 }
10688 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010689#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010690 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010691#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010692 xmlFree(ctxt->sax);
10693 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10694 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010695 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010696 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010697 return(NULL);
10698 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010699 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10700 if (sax->initialized == XML_SAX2_MAGIC)
10701 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10702 else
10703 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010704 if (user_data != NULL)
10705 ctxt->userData = user_data;
10706 }
10707
10708 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10709 if (inputStream == NULL) {
10710 xmlFreeParserCtxt(ctxt);
10711 return(NULL);
10712 }
10713 inputPush(ctxt, inputStream);
10714
10715 return(ctxt);
10716}
10717
Daniel Veillard4432df22003-09-28 18:58:27 +000010718#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010719/************************************************************************
10720 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010721 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010722 * *
10723 ************************************************************************/
10724
10725/**
10726 * xmlIOParseDTD:
10727 * @sax: the SAX handler block or NULL
10728 * @input: an Input Buffer
10729 * @enc: the charset encoding if known
10730 *
10731 * Load and parse a DTD
10732 *
10733 * Returns the resulting xmlDtdPtr or NULL in case of error.
10734 * @input will be freed at parsing end.
10735 */
10736
10737xmlDtdPtr
10738xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10739 xmlCharEncoding enc) {
10740 xmlDtdPtr ret = NULL;
10741 xmlParserCtxtPtr ctxt;
10742 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010743 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010744
10745 if (input == NULL)
10746 return(NULL);
10747
10748 ctxt = xmlNewParserCtxt();
10749 if (ctxt == NULL) {
10750 return(NULL);
10751 }
10752
10753 /*
10754 * Set-up the SAX context
10755 */
10756 if (sax != NULL) {
10757 if (ctxt->sax != NULL)
10758 xmlFree(ctxt->sax);
10759 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010760 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010761 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010762 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010763
10764 /*
10765 * generate a parser input from the I/O handler
10766 */
10767
Daniel Veillard43caefb2003-12-07 19:32:22 +000010768 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010769 if (pinput == NULL) {
10770 if (sax != NULL) ctxt->sax = NULL;
10771 xmlFreeParserCtxt(ctxt);
10772 return(NULL);
10773 }
10774
10775 /*
10776 * plug some encoding conversion routines here.
10777 */
10778 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010779 if (enc != XML_CHAR_ENCODING_NONE) {
10780 xmlSwitchEncoding(ctxt, enc);
10781 }
Owen Taylor3473f882001-02-23 17:55:21 +000010782
10783 pinput->filename = NULL;
10784 pinput->line = 1;
10785 pinput->col = 1;
10786 pinput->base = ctxt->input->cur;
10787 pinput->cur = ctxt->input->cur;
10788 pinput->free = NULL;
10789
10790 /*
10791 * let's parse that entity knowing it's an external subset.
10792 */
10793 ctxt->inSubset = 2;
10794 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10795 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10796 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010797
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010798 if ((enc == XML_CHAR_ENCODING_NONE) &&
10799 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010800 /*
10801 * Get the 4 first bytes and decode the charset
10802 * if enc != XML_CHAR_ENCODING_NONE
10803 * plug some encoding conversion routines.
10804 */
10805 start[0] = RAW;
10806 start[1] = NXT(1);
10807 start[2] = NXT(2);
10808 start[3] = NXT(3);
10809 enc = xmlDetectCharEncoding(start, 4);
10810 if (enc != XML_CHAR_ENCODING_NONE) {
10811 xmlSwitchEncoding(ctxt, enc);
10812 }
10813 }
10814
Owen Taylor3473f882001-02-23 17:55:21 +000010815 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10816
10817 if (ctxt->myDoc != NULL) {
10818 if (ctxt->wellFormed) {
10819 ret = ctxt->myDoc->extSubset;
10820 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010821 if (ret != NULL) {
10822 xmlNodePtr tmp;
10823
10824 ret->doc = NULL;
10825 tmp = ret->children;
10826 while (tmp != NULL) {
10827 tmp->doc = NULL;
10828 tmp = tmp->next;
10829 }
10830 }
Owen Taylor3473f882001-02-23 17:55:21 +000010831 } else {
10832 ret = NULL;
10833 }
10834 xmlFreeDoc(ctxt->myDoc);
10835 ctxt->myDoc = NULL;
10836 }
10837 if (sax != NULL) ctxt->sax = NULL;
10838 xmlFreeParserCtxt(ctxt);
10839
10840 return(ret);
10841}
10842
10843/**
10844 * xmlSAXParseDTD:
10845 * @sax: the SAX handler block
10846 * @ExternalID: a NAME* containing the External ID of the DTD
10847 * @SystemID: a NAME* containing the URL to the DTD
10848 *
10849 * Load and parse an external subset.
10850 *
10851 * Returns the resulting xmlDtdPtr or NULL in case of error.
10852 */
10853
10854xmlDtdPtr
10855xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10856 const xmlChar *SystemID) {
10857 xmlDtdPtr ret = NULL;
10858 xmlParserCtxtPtr ctxt;
10859 xmlParserInputPtr input = NULL;
10860 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010861 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010862
10863 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10864
10865 ctxt = xmlNewParserCtxt();
10866 if (ctxt == NULL) {
10867 return(NULL);
10868 }
10869
10870 /*
10871 * Set-up the SAX context
10872 */
10873 if (sax != NULL) {
10874 if (ctxt->sax != NULL)
10875 xmlFree(ctxt->sax);
10876 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010877 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010878 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010879
10880 /*
10881 * Canonicalise the system ID
10882 */
10883 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010884 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010885 xmlFreeParserCtxt(ctxt);
10886 return(NULL);
10887 }
Owen Taylor3473f882001-02-23 17:55:21 +000010888
10889 /*
10890 * Ask the Entity resolver to load the damn thing
10891 */
10892
10893 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010894 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010895 if (input == NULL) {
10896 if (sax != NULL) ctxt->sax = NULL;
10897 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010898 if (systemIdCanonic != NULL)
10899 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010900 return(NULL);
10901 }
10902
10903 /*
10904 * plug some encoding conversion routines here.
10905 */
10906 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010907 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10908 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10909 xmlSwitchEncoding(ctxt, enc);
10910 }
Owen Taylor3473f882001-02-23 17:55:21 +000010911
10912 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010913 input->filename = (char *) systemIdCanonic;
10914 else
10915 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010916 input->line = 1;
10917 input->col = 1;
10918 input->base = ctxt->input->cur;
10919 input->cur = ctxt->input->cur;
10920 input->free = NULL;
10921
10922 /*
10923 * let's parse that entity knowing it's an external subset.
10924 */
10925 ctxt->inSubset = 2;
10926 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10927 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10928 ExternalID, SystemID);
10929 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10930
10931 if (ctxt->myDoc != NULL) {
10932 if (ctxt->wellFormed) {
10933 ret = ctxt->myDoc->extSubset;
10934 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010935 if (ret != NULL) {
10936 xmlNodePtr tmp;
10937
10938 ret->doc = NULL;
10939 tmp = ret->children;
10940 while (tmp != NULL) {
10941 tmp->doc = NULL;
10942 tmp = tmp->next;
10943 }
10944 }
Owen Taylor3473f882001-02-23 17:55:21 +000010945 } else {
10946 ret = NULL;
10947 }
10948 xmlFreeDoc(ctxt->myDoc);
10949 ctxt->myDoc = NULL;
10950 }
10951 if (sax != NULL) ctxt->sax = NULL;
10952 xmlFreeParserCtxt(ctxt);
10953
10954 return(ret);
10955}
10956
Daniel Veillard4432df22003-09-28 18:58:27 +000010957
Owen Taylor3473f882001-02-23 17:55:21 +000010958/**
10959 * xmlParseDTD:
10960 * @ExternalID: a NAME* containing the External ID of the DTD
10961 * @SystemID: a NAME* containing the URL to the DTD
10962 *
10963 * Load and parse an external subset.
10964 *
10965 * Returns the resulting xmlDtdPtr or NULL in case of error.
10966 */
10967
10968xmlDtdPtr
10969xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10970 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10971}
Daniel Veillard4432df22003-09-28 18:58:27 +000010972#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010973
10974/************************************************************************
10975 * *
10976 * Front ends when parsing an Entity *
10977 * *
10978 ************************************************************************/
10979
10980/**
Owen Taylor3473f882001-02-23 17:55:21 +000010981 * xmlParseCtxtExternalEntity:
10982 * @ctx: the existing parsing context
10983 * @URL: the URL for the entity to load
10984 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010985 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010986 *
10987 * Parse an external general entity within an existing parsing context
10988 * An external general parsed entity is well-formed if it matches the
10989 * production labeled extParsedEnt.
10990 *
10991 * [78] extParsedEnt ::= TextDecl? content
10992 *
10993 * Returns 0 if the entity is well formed, -1 in case of args problem and
10994 * the parser error code otherwise
10995 */
10996
10997int
10998xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010999 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011000 xmlParserCtxtPtr ctxt;
11001 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011002 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011003 xmlSAXHandlerPtr oldsax = NULL;
11004 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011005 xmlChar start[4];
11006 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011007
Daniel Veillardce682bc2004-11-05 17:22:25 +000011008 if (ctx == NULL) return(-1);
11009
Owen Taylor3473f882001-02-23 17:55:21 +000011010 if (ctx->depth > 40) {
11011 return(XML_ERR_ENTITY_LOOP);
11012 }
11013
Daniel Veillardcda96922001-08-21 10:56:31 +000011014 if (lst != NULL)
11015 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011016 if ((URL == NULL) && (ID == NULL))
11017 return(-1);
11018 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11019 return(-1);
11020
11021
11022 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11023 if (ctxt == NULL) return(-1);
11024 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011025 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011026 oldsax = ctxt->sax;
11027 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011028 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011029 newDoc = xmlNewDoc(BAD_CAST "1.0");
11030 if (newDoc == NULL) {
11031 xmlFreeParserCtxt(ctxt);
11032 return(-1);
11033 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011034 if (ctx->myDoc->dict) {
11035 newDoc->dict = ctx->myDoc->dict;
11036 xmlDictReference(newDoc->dict);
11037 }
Owen Taylor3473f882001-02-23 17:55:21 +000011038 if (ctx->myDoc != NULL) {
11039 newDoc->intSubset = ctx->myDoc->intSubset;
11040 newDoc->extSubset = ctx->myDoc->extSubset;
11041 }
11042 if (ctx->myDoc->URL != NULL) {
11043 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11044 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011045 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11046 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011047 ctxt->sax = oldsax;
11048 xmlFreeParserCtxt(ctxt);
11049 newDoc->intSubset = NULL;
11050 newDoc->extSubset = NULL;
11051 xmlFreeDoc(newDoc);
11052 return(-1);
11053 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011054 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011055 nodePush(ctxt, newDoc->children);
11056 if (ctx->myDoc == NULL) {
11057 ctxt->myDoc = newDoc;
11058 } else {
11059 ctxt->myDoc = ctx->myDoc;
11060 newDoc->children->doc = ctx->myDoc;
11061 }
11062
Daniel Veillard87a764e2001-06-20 17:41:10 +000011063 /*
11064 * Get the 4 first bytes and decode the charset
11065 * if enc != XML_CHAR_ENCODING_NONE
11066 * plug some encoding conversion routines.
11067 */
11068 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011069 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11070 start[0] = RAW;
11071 start[1] = NXT(1);
11072 start[2] = NXT(2);
11073 start[3] = NXT(3);
11074 enc = xmlDetectCharEncoding(start, 4);
11075 if (enc != XML_CHAR_ENCODING_NONE) {
11076 xmlSwitchEncoding(ctxt, enc);
11077 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011078 }
11079
Owen Taylor3473f882001-02-23 17:55:21 +000011080 /*
11081 * Parse a possible text declaration first
11082 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011083 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011084 xmlParseTextDecl(ctxt);
11085 }
11086
11087 /*
11088 * Doing validity checking on chunk doesn't make sense
11089 */
11090 ctxt->instate = XML_PARSER_CONTENT;
11091 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011092 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011093 ctxt->loadsubset = ctx->loadsubset;
11094 ctxt->depth = ctx->depth + 1;
11095 ctxt->replaceEntities = ctx->replaceEntities;
11096 if (ctxt->validate) {
11097 ctxt->vctxt.error = ctx->vctxt.error;
11098 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011099 } else {
11100 ctxt->vctxt.error = NULL;
11101 ctxt->vctxt.warning = NULL;
11102 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011103 ctxt->vctxt.nodeTab = NULL;
11104 ctxt->vctxt.nodeNr = 0;
11105 ctxt->vctxt.nodeMax = 0;
11106 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011107 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11108 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011109 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11110 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11111 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011112 ctxt->dictNames = ctx->dictNames;
11113 ctxt->attsDefault = ctx->attsDefault;
11114 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011115 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011116
11117 xmlParseContent(ctxt);
11118
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011119 ctx->validate = ctxt->validate;
11120 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011121 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011122 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011123 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011124 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011125 }
11126 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011127 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011128 }
11129
11130 if (!ctxt->wellFormed) {
11131 if (ctxt->errNo == 0)
11132 ret = 1;
11133 else
11134 ret = ctxt->errNo;
11135 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011136 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011137 xmlNodePtr cur;
11138
11139 /*
11140 * Return the newly created nodeset after unlinking it from
11141 * they pseudo parent.
11142 */
11143 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011144 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011145 while (cur != NULL) {
11146 cur->parent = NULL;
11147 cur = cur->next;
11148 }
11149 newDoc->children->children = NULL;
11150 }
11151 ret = 0;
11152 }
11153 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011154 ctxt->dict = NULL;
11155 ctxt->attsDefault = NULL;
11156 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011157 xmlFreeParserCtxt(ctxt);
11158 newDoc->intSubset = NULL;
11159 newDoc->extSubset = NULL;
11160 xmlFreeDoc(newDoc);
11161
11162 return(ret);
11163}
11164
11165/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011166 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011167 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011168 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011169 * @sax: the SAX handler bloc (possibly NULL)
11170 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11171 * @depth: Used for loop detection, use 0
11172 * @URL: the URL for the entity to load
11173 * @ID: the System ID for the entity to load
11174 * @list: the return value for the set of parsed nodes
11175 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011176 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011177 *
11178 * Returns 0 if the entity is well formed, -1 in case of args problem and
11179 * the parser error code otherwise
11180 */
11181
Daniel Veillard7d515752003-09-26 19:12:37 +000011182static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011183xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11184 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011185 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011186 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011187 xmlParserCtxtPtr ctxt;
11188 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011189 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011190 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011191 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011192 xmlChar start[4];
11193 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011194
11195 if (depth > 40) {
11196 return(XML_ERR_ENTITY_LOOP);
11197 }
11198
11199
11200
11201 if (list != NULL)
11202 *list = NULL;
11203 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011204 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011205 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011206 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011207
11208
11209 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011210 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011211 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011212 if (oldctxt != NULL) {
11213 ctxt->_private = oldctxt->_private;
11214 ctxt->loadsubset = oldctxt->loadsubset;
11215 ctxt->validate = oldctxt->validate;
11216 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011217 ctxt->record_info = oldctxt->record_info;
11218 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11219 ctxt->node_seq.length = oldctxt->node_seq.length;
11220 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011221 } else {
11222 /*
11223 * Doing validity checking on chunk without context
11224 * doesn't make sense
11225 */
11226 ctxt->_private = NULL;
11227 ctxt->validate = 0;
11228 ctxt->external = 2;
11229 ctxt->loadsubset = 0;
11230 }
Owen Taylor3473f882001-02-23 17:55:21 +000011231 if (sax != NULL) {
11232 oldsax = ctxt->sax;
11233 ctxt->sax = sax;
11234 if (user_data != NULL)
11235 ctxt->userData = user_data;
11236 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011237 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011238 newDoc = xmlNewDoc(BAD_CAST "1.0");
11239 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011240 ctxt->node_seq.maximum = 0;
11241 ctxt->node_seq.length = 0;
11242 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011243 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011244 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 }
11246 if (doc != NULL) {
11247 newDoc->intSubset = doc->intSubset;
11248 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011249 newDoc->dict = doc->dict;
11250 } else if (oldctxt != NULL) {
11251 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011252 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011253 xmlDictReference(newDoc->dict);
11254
Owen Taylor3473f882001-02-23 17:55:21 +000011255 if (doc->URL != NULL) {
11256 newDoc->URL = xmlStrdup(doc->URL);
11257 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011258 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11259 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011260 if (sax != NULL)
11261 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011262 ctxt->node_seq.maximum = 0;
11263 ctxt->node_seq.length = 0;
11264 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011265 xmlFreeParserCtxt(ctxt);
11266 newDoc->intSubset = NULL;
11267 newDoc->extSubset = NULL;
11268 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011269 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011270 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011271 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011272 nodePush(ctxt, newDoc->children);
11273 if (doc == NULL) {
11274 ctxt->myDoc = newDoc;
11275 } else {
11276 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011277 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011278 }
11279
Daniel Veillard87a764e2001-06-20 17:41:10 +000011280 /*
11281 * Get the 4 first bytes and decode the charset
11282 * if enc != XML_CHAR_ENCODING_NONE
11283 * plug some encoding conversion routines.
11284 */
11285 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011286 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11287 start[0] = RAW;
11288 start[1] = NXT(1);
11289 start[2] = NXT(2);
11290 start[3] = NXT(3);
11291 enc = xmlDetectCharEncoding(start, 4);
11292 if (enc != XML_CHAR_ENCODING_NONE) {
11293 xmlSwitchEncoding(ctxt, enc);
11294 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011295 }
11296
Owen Taylor3473f882001-02-23 17:55:21 +000011297 /*
11298 * Parse a possible text declaration first
11299 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011300 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011301 xmlParseTextDecl(ctxt);
11302 }
11303
Owen Taylor3473f882001-02-23 17:55:21 +000011304 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011305 ctxt->depth = depth;
11306
11307 xmlParseContent(ctxt);
11308
Daniel Veillard561b7f82002-03-20 21:55:57 +000011309 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011310 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011311 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011312 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011313 }
11314 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011315 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011316 }
11317
11318 if (!ctxt->wellFormed) {
11319 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011320 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011321 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011322 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011323 } else {
11324 if (list != NULL) {
11325 xmlNodePtr cur;
11326
11327 /*
11328 * Return the newly created nodeset after unlinking it from
11329 * they pseudo parent.
11330 */
11331 cur = newDoc->children->children;
11332 *list = cur;
11333 while (cur != NULL) {
11334 cur->parent = NULL;
11335 cur = cur->next;
11336 }
11337 newDoc->children->children = NULL;
11338 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011339 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011340 }
11341 if (sax != NULL)
11342 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011343 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11344 oldctxt->node_seq.length = ctxt->node_seq.length;
11345 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011346 ctxt->node_seq.maximum = 0;
11347 ctxt->node_seq.length = 0;
11348 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011349 xmlFreeParserCtxt(ctxt);
11350 newDoc->intSubset = NULL;
11351 newDoc->extSubset = NULL;
11352 xmlFreeDoc(newDoc);
11353
11354 return(ret);
11355}
11356
Daniel Veillard81273902003-09-30 00:43:48 +000011357#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011358/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011359 * xmlParseExternalEntity:
11360 * @doc: the document the chunk pertains to
11361 * @sax: the SAX handler bloc (possibly NULL)
11362 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11363 * @depth: Used for loop detection, use 0
11364 * @URL: the URL for the entity to load
11365 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011366 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011367 *
11368 * Parse an external general entity
11369 * An external general parsed entity is well-formed if it matches the
11370 * production labeled extParsedEnt.
11371 *
11372 * [78] extParsedEnt ::= TextDecl? content
11373 *
11374 * Returns 0 if the entity is well formed, -1 in case of args problem and
11375 * the parser error code otherwise
11376 */
11377
11378int
11379xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011380 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011381 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011382 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011383}
11384
11385/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011386 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011387 * @doc: the document the chunk pertains to
11388 * @sax: the SAX handler bloc (possibly NULL)
11389 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11390 * @depth: Used for loop detection, use 0
11391 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011392 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011393 *
11394 * Parse a well-balanced chunk of an XML document
11395 * called by the parser
11396 * The allowed sequence for the Well Balanced Chunk is the one defined by
11397 * the content production in the XML grammar:
11398 *
11399 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11400 *
11401 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11402 * the parser error code otherwise
11403 */
11404
11405int
11406xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011407 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011408 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11409 depth, string, lst, 0 );
11410}
Daniel Veillard81273902003-09-30 00:43:48 +000011411#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011412
11413/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011414 * xmlParseBalancedChunkMemoryInternal:
11415 * @oldctxt: the existing parsing context
11416 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11417 * @user_data: the user data field for the parser context
11418 * @lst: the return value for the set of parsed nodes
11419 *
11420 *
11421 * Parse a well-balanced chunk of an XML document
11422 * called by the parser
11423 * The allowed sequence for the Well Balanced Chunk is the one defined by
11424 * the content production in the XML grammar:
11425 *
11426 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11427 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011428 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11429 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011430 *
11431 * In case recover is set to 1, the nodelist will not be empty even if
11432 * the parsed chunk is not well balanced.
11433 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011434static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011435xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11436 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11437 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011438 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011439 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011440 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011441 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011442 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011443 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011444 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011445
11446 if (oldctxt->depth > 40) {
11447 return(XML_ERR_ENTITY_LOOP);
11448 }
11449
11450
11451 if (lst != NULL)
11452 *lst = NULL;
11453 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011454 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011455
11456 size = xmlStrlen(string);
11457
11458 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011459 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011460 if (user_data != NULL)
11461 ctxt->userData = user_data;
11462 else
11463 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011464 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11465 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011466 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11467 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11468 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011469
11470 oldsax = ctxt->sax;
11471 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011472 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011473 ctxt->replaceEntities = oldctxt->replaceEntities;
11474 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011475
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011476 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011477 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011478 newDoc = xmlNewDoc(BAD_CAST "1.0");
11479 if (newDoc == NULL) {
11480 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011481 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011482 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011483 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011484 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011485 newDoc->dict = ctxt->dict;
11486 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011487 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011488 } else {
11489 ctxt->myDoc = oldctxt->myDoc;
11490 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011491 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011492 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011493 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11494 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011495 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011496 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011497 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011498 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011499 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011500 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011501 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011502 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011503 ctxt->myDoc->children = NULL;
11504 ctxt->myDoc->last = NULL;
11505 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011506 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011507 ctxt->instate = XML_PARSER_CONTENT;
11508 ctxt->depth = oldctxt->depth + 1;
11509
Daniel Veillard328f48c2002-11-15 15:24:34 +000011510 ctxt->validate = 0;
11511 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011512 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11513 /*
11514 * ID/IDREF registration will be done in xmlValidateElement below
11515 */
11516 ctxt->loadsubset |= XML_SKIP_IDS;
11517 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011518 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011519 ctxt->attsDefault = oldctxt->attsDefault;
11520 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011521
Daniel Veillard68e9e742002-11-16 15:35:11 +000011522 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011523 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011524 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011525 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011526 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011527 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011528 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011529 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011530 }
11531
11532 if (!ctxt->wellFormed) {
11533 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011534 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011535 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011536 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011537 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011538 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011539 }
11540
William M. Brack7b9154b2003-09-27 19:23:50 +000011541 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011542 xmlNodePtr cur;
11543
11544 /*
11545 * Return the newly created nodeset after unlinking it from
11546 * they pseudo parent.
11547 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011548 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011549 *lst = cur;
11550 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011551#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011552 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11553 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11554 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011555 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11556 oldctxt->myDoc, cur);
11557 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011558#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011559 cur->parent = NULL;
11560 cur = cur->next;
11561 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011562 ctxt->myDoc->children->children = NULL;
11563 }
11564 if (ctxt->myDoc != NULL) {
11565 xmlFreeNode(ctxt->myDoc->children);
11566 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011567 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011568 }
11569
11570 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011571 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011572 ctxt->attsDefault = NULL;
11573 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011574 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011575 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011576 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011577 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011578
11579 return(ret);
11580}
11581
Daniel Veillard29b17482004-08-16 00:39:03 +000011582/**
11583 * xmlParseInNodeContext:
11584 * @node: the context node
11585 * @data: the input string
11586 * @datalen: the input string length in bytes
11587 * @options: a combination of xmlParserOption
11588 * @lst: the return value for the set of parsed nodes
11589 *
11590 * Parse a well-balanced chunk of an XML document
11591 * within the context (DTD, namespaces, etc ...) of the given node.
11592 *
11593 * The allowed sequence for the data is a Well Balanced Chunk defined by
11594 * the content production in the XML grammar:
11595 *
11596 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11597 *
11598 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11599 * error code otherwise
11600 */
11601xmlParserErrors
11602xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11603 int options, xmlNodePtr *lst) {
11604#ifdef SAX2
11605 xmlParserCtxtPtr ctxt;
11606 xmlDocPtr doc = NULL;
11607 xmlNodePtr fake, cur;
11608 int nsnr = 0;
11609
11610 xmlParserErrors ret = XML_ERR_OK;
11611
11612 /*
11613 * check all input parameters, grab the document
11614 */
11615 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11616 return(XML_ERR_INTERNAL_ERROR);
11617 switch (node->type) {
11618 case XML_ELEMENT_NODE:
11619 case XML_ATTRIBUTE_NODE:
11620 case XML_TEXT_NODE:
11621 case XML_CDATA_SECTION_NODE:
11622 case XML_ENTITY_REF_NODE:
11623 case XML_PI_NODE:
11624 case XML_COMMENT_NODE:
11625 case XML_DOCUMENT_NODE:
11626 case XML_HTML_DOCUMENT_NODE:
11627 break;
11628 default:
11629 return(XML_ERR_INTERNAL_ERROR);
11630
11631 }
11632 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11633 (node->type != XML_DOCUMENT_NODE) &&
11634 (node->type != XML_HTML_DOCUMENT_NODE))
11635 node = node->parent;
11636 if (node == NULL)
11637 return(XML_ERR_INTERNAL_ERROR);
11638 if (node->type == XML_ELEMENT_NODE)
11639 doc = node->doc;
11640 else
11641 doc = (xmlDocPtr) node;
11642 if (doc == NULL)
11643 return(XML_ERR_INTERNAL_ERROR);
11644
11645 /*
11646 * allocate a context and set-up everything not related to the
11647 * node position in the tree
11648 */
11649 if (doc->type == XML_DOCUMENT_NODE)
11650 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11651#ifdef LIBXML_HTML_ENABLED
11652 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11653 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11654#endif
11655 else
11656 return(XML_ERR_INTERNAL_ERROR);
11657
11658 if (ctxt == NULL)
11659 return(XML_ERR_NO_MEMORY);
11660 fake = xmlNewComment(NULL);
11661 if (fake == NULL) {
11662 xmlFreeParserCtxt(ctxt);
11663 return(XML_ERR_NO_MEMORY);
11664 }
11665 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011666
11667 /*
11668 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11669 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11670 * we must wait until the last moment to free the original one.
11671 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011672 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011673 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011674 xmlDictFree(ctxt->dict);
11675 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011676 } else
11677 options |= XML_PARSE_NODICT;
11678
11679 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011680 xmlDetectSAX2(ctxt);
11681 ctxt->myDoc = doc;
11682
11683 if (node->type == XML_ELEMENT_NODE) {
11684 nodePush(ctxt, node);
11685 /*
11686 * initialize the SAX2 namespaces stack
11687 */
11688 cur = node;
11689 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11690 xmlNsPtr ns = cur->nsDef;
11691 const xmlChar *iprefix, *ihref;
11692
11693 while (ns != NULL) {
11694 if (ctxt->dict) {
11695 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11696 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11697 } else {
11698 iprefix = ns->prefix;
11699 ihref = ns->href;
11700 }
11701
11702 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11703 nsPush(ctxt, iprefix, ihref);
11704 nsnr++;
11705 }
11706 ns = ns->next;
11707 }
11708 cur = cur->parent;
11709 }
11710 ctxt->instate = XML_PARSER_CONTENT;
11711 }
11712
11713 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11714 /*
11715 * ID/IDREF registration will be done in xmlValidateElement below
11716 */
11717 ctxt->loadsubset |= XML_SKIP_IDS;
11718 }
11719
Daniel Veillard499cc922006-01-18 17:22:35 +000011720#ifdef LIBXML_HTML_ENABLED
11721 if (doc->type == XML_HTML_DOCUMENT_NODE)
11722 __htmlParseContent(ctxt);
11723 else
11724#endif
11725 xmlParseContent(ctxt);
11726
Daniel Veillard29b17482004-08-16 00:39:03 +000011727 nsPop(ctxt, nsnr);
11728 if ((RAW == '<') && (NXT(1) == '/')) {
11729 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11730 } else if (RAW != 0) {
11731 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11732 }
11733 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11734 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11735 ctxt->wellFormed = 0;
11736 }
11737
11738 if (!ctxt->wellFormed) {
11739 if (ctxt->errNo == 0)
11740 ret = XML_ERR_INTERNAL_ERROR;
11741 else
11742 ret = (xmlParserErrors)ctxt->errNo;
11743 } else {
11744 ret = XML_ERR_OK;
11745 }
11746
11747 /*
11748 * Return the newly created nodeset after unlinking it from
11749 * the pseudo sibling.
11750 */
11751
11752 cur = fake->next;
11753 fake->next = NULL;
11754 node->last = fake;
11755
11756 if (cur != NULL) {
11757 cur->prev = NULL;
11758 }
11759
11760 *lst = cur;
11761
11762 while (cur != NULL) {
11763 cur->parent = NULL;
11764 cur = cur->next;
11765 }
11766
11767 xmlUnlinkNode(fake);
11768 xmlFreeNode(fake);
11769
11770
11771 if (ret != XML_ERR_OK) {
11772 xmlFreeNodeList(*lst);
11773 *lst = NULL;
11774 }
William M. Brackc3f81342004-10-03 01:22:44 +000011775
William M. Brackb7b54de2004-10-06 16:38:01 +000011776 if (doc->dict != NULL)
11777 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011778 xmlFreeParserCtxt(ctxt);
11779
11780 return(ret);
11781#else /* !SAX2 */
11782 return(XML_ERR_INTERNAL_ERROR);
11783#endif
11784}
11785
Daniel Veillard81273902003-09-30 00:43:48 +000011786#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011787/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011788 * xmlParseBalancedChunkMemoryRecover:
11789 * @doc: the document the chunk pertains to
11790 * @sax: the SAX handler bloc (possibly NULL)
11791 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11792 * @depth: Used for loop detection, use 0
11793 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11794 * @lst: the return value for the set of parsed nodes
11795 * @recover: return nodes even if the data is broken (use 0)
11796 *
11797 *
11798 * Parse a well-balanced chunk of an XML document
11799 * called by the parser
11800 * The allowed sequence for the Well Balanced Chunk is the one defined by
11801 * the content production in the XML grammar:
11802 *
11803 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11804 *
11805 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11806 * the parser error code otherwise
11807 *
11808 * In case recover is set to 1, the nodelist will not be empty even if
11809 * the parsed chunk is not well balanced.
11810 */
11811int
11812xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11813 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11814 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011815 xmlParserCtxtPtr ctxt;
11816 xmlDocPtr newDoc;
11817 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011818 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011819 int size;
11820 int ret = 0;
11821
11822 if (depth > 40) {
11823 return(XML_ERR_ENTITY_LOOP);
11824 }
11825
11826
Daniel Veillardcda96922001-08-21 10:56:31 +000011827 if (lst != NULL)
11828 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011829 if (string == NULL)
11830 return(-1);
11831
11832 size = xmlStrlen(string);
11833
11834 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11835 if (ctxt == NULL) return(-1);
11836 ctxt->userData = ctxt;
11837 if (sax != NULL) {
11838 oldsax = ctxt->sax;
11839 ctxt->sax = sax;
11840 if (user_data != NULL)
11841 ctxt->userData = user_data;
11842 }
11843 newDoc = xmlNewDoc(BAD_CAST "1.0");
11844 if (newDoc == NULL) {
11845 xmlFreeParserCtxt(ctxt);
11846 return(-1);
11847 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011848 if ((doc != NULL) && (doc->dict != NULL)) {
11849 xmlDictFree(ctxt->dict);
11850 ctxt->dict = doc->dict;
11851 xmlDictReference(ctxt->dict);
11852 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11853 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11854 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11855 ctxt->dictNames = 1;
11856 } else {
11857 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11858 }
Owen Taylor3473f882001-02-23 17:55:21 +000011859 if (doc != NULL) {
11860 newDoc->intSubset = doc->intSubset;
11861 newDoc->extSubset = doc->extSubset;
11862 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011863 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11864 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011865 if (sax != NULL)
11866 ctxt->sax = oldsax;
11867 xmlFreeParserCtxt(ctxt);
11868 newDoc->intSubset = NULL;
11869 newDoc->extSubset = NULL;
11870 xmlFreeDoc(newDoc);
11871 return(-1);
11872 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011873 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11874 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011875 if (doc == NULL) {
11876 ctxt->myDoc = newDoc;
11877 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011878 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011879 newDoc->children->doc = doc;
11880 }
11881 ctxt->instate = XML_PARSER_CONTENT;
11882 ctxt->depth = depth;
11883
11884 /*
11885 * Doing validity checking on chunk doesn't make sense
11886 */
11887 ctxt->validate = 0;
11888 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011889 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011890
Daniel Veillardb39bc392002-10-26 19:29:51 +000011891 if ( doc != NULL ){
11892 content = doc->children;
11893 doc->children = NULL;
11894 xmlParseContent(ctxt);
11895 doc->children = content;
11896 }
11897 else {
11898 xmlParseContent(ctxt);
11899 }
Owen Taylor3473f882001-02-23 17:55:21 +000011900 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011901 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011903 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011904 }
11905 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011906 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011907 }
11908
11909 if (!ctxt->wellFormed) {
11910 if (ctxt->errNo == 0)
11911 ret = 1;
11912 else
11913 ret = ctxt->errNo;
11914 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011915 ret = 0;
11916 }
11917
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011918 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11919 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011920
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011921 /*
11922 * Return the newly created nodeset after unlinking it from
11923 * they pseudo parent.
11924 */
11925 cur = newDoc->children->children;
11926 *lst = cur;
11927 while (cur != NULL) {
11928 xmlSetTreeDoc(cur, doc);
11929 cur->parent = NULL;
11930 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011931 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011932 newDoc->children->children = NULL;
11933 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011934
Owen Taylor3473f882001-02-23 17:55:21 +000011935 if (sax != NULL)
11936 ctxt->sax = oldsax;
11937 xmlFreeParserCtxt(ctxt);
11938 newDoc->intSubset = NULL;
11939 newDoc->extSubset = NULL;
11940 xmlFreeDoc(newDoc);
11941
11942 return(ret);
11943}
11944
11945/**
11946 * xmlSAXParseEntity:
11947 * @sax: the SAX handler block
11948 * @filename: the filename
11949 *
11950 * parse an XML external entity out of context and build a tree.
11951 * It use the given SAX function block to handle the parsing callback.
11952 * If sax is NULL, fallback to the default DOM tree building routines.
11953 *
11954 * [78] extParsedEnt ::= TextDecl? content
11955 *
11956 * This correspond to a "Well Balanced" chunk
11957 *
11958 * Returns the resulting document tree
11959 */
11960
11961xmlDocPtr
11962xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11963 xmlDocPtr ret;
11964 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011965
11966 ctxt = xmlCreateFileParserCtxt(filename);
11967 if (ctxt == NULL) {
11968 return(NULL);
11969 }
11970 if (sax != NULL) {
11971 if (ctxt->sax != NULL)
11972 xmlFree(ctxt->sax);
11973 ctxt->sax = sax;
11974 ctxt->userData = NULL;
11975 }
11976
Owen Taylor3473f882001-02-23 17:55:21 +000011977 xmlParseExtParsedEnt(ctxt);
11978
11979 if (ctxt->wellFormed)
11980 ret = ctxt->myDoc;
11981 else {
11982 ret = NULL;
11983 xmlFreeDoc(ctxt->myDoc);
11984 ctxt->myDoc = NULL;
11985 }
11986 if (sax != NULL)
11987 ctxt->sax = NULL;
11988 xmlFreeParserCtxt(ctxt);
11989
11990 return(ret);
11991}
11992
11993/**
11994 * xmlParseEntity:
11995 * @filename: the filename
11996 *
11997 * parse an XML external entity out of context and build a tree.
11998 *
11999 * [78] extParsedEnt ::= TextDecl? content
12000 *
12001 * This correspond to a "Well Balanced" chunk
12002 *
12003 * Returns the resulting document tree
12004 */
12005
12006xmlDocPtr
12007xmlParseEntity(const char *filename) {
12008 return(xmlSAXParseEntity(NULL, filename));
12009}
Daniel Veillard81273902003-09-30 00:43:48 +000012010#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012011
12012/**
12013 * xmlCreateEntityParserCtxt:
12014 * @URL: the entity URL
12015 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012016 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012017 *
12018 * Create a parser context for an external entity
12019 * Automatic support for ZLIB/Compress compressed document is provided
12020 * by default if found at compile-time.
12021 *
12022 * Returns the new parser context or NULL
12023 */
12024xmlParserCtxtPtr
12025xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12026 const xmlChar *base) {
12027 xmlParserCtxtPtr ctxt;
12028 xmlParserInputPtr inputStream;
12029 char *directory = NULL;
12030 xmlChar *uri;
12031
12032 ctxt = xmlNewParserCtxt();
12033 if (ctxt == NULL) {
12034 return(NULL);
12035 }
12036
12037 uri = xmlBuildURI(URL, base);
12038
12039 if (uri == NULL) {
12040 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12041 if (inputStream == NULL) {
12042 xmlFreeParserCtxt(ctxt);
12043 return(NULL);
12044 }
12045
12046 inputPush(ctxt, inputStream);
12047
12048 if ((ctxt->directory == NULL) && (directory == NULL))
12049 directory = xmlParserGetDirectory((char *)URL);
12050 if ((ctxt->directory == NULL) && (directory != NULL))
12051 ctxt->directory = directory;
12052 } else {
12053 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12054 if (inputStream == NULL) {
12055 xmlFree(uri);
12056 xmlFreeParserCtxt(ctxt);
12057 return(NULL);
12058 }
12059
12060 inputPush(ctxt, inputStream);
12061
12062 if ((ctxt->directory == NULL) && (directory == NULL))
12063 directory = xmlParserGetDirectory((char *)uri);
12064 if ((ctxt->directory == NULL) && (directory != NULL))
12065 ctxt->directory = directory;
12066 xmlFree(uri);
12067 }
Owen Taylor3473f882001-02-23 17:55:21 +000012068 return(ctxt);
12069}
12070
12071/************************************************************************
12072 * *
12073 * Front ends when parsing from a file *
12074 * *
12075 ************************************************************************/
12076
12077/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012078 * xmlCreateURLParserCtxt:
12079 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012080 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012081 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012082 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012083 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012084 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012085 *
12086 * Returns the new parser context or NULL
12087 */
12088xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012089xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012090{
12091 xmlParserCtxtPtr ctxt;
12092 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012093 char *directory = NULL;
12094
Owen Taylor3473f882001-02-23 17:55:21 +000012095 ctxt = xmlNewParserCtxt();
12096 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012097 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012098 return(NULL);
12099 }
12100
Daniel Veillarddf292f72005-01-16 19:00:15 +000012101 if (options)
12102 xmlCtxtUseOptions(ctxt, options);
12103 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012104
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012105 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 if (inputStream == NULL) {
12107 xmlFreeParserCtxt(ctxt);
12108 return(NULL);
12109 }
12110
Owen Taylor3473f882001-02-23 17:55:21 +000012111 inputPush(ctxt, inputStream);
12112 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012113 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012114 if ((ctxt->directory == NULL) && (directory != NULL))
12115 ctxt->directory = directory;
12116
12117 return(ctxt);
12118}
12119
Daniel Veillard61b93382003-11-03 14:28:31 +000012120/**
12121 * xmlCreateFileParserCtxt:
12122 * @filename: the filename
12123 *
12124 * Create a parser context for a file content.
12125 * Automatic support for ZLIB/Compress compressed document is provided
12126 * by default if found at compile-time.
12127 *
12128 * Returns the new parser context or NULL
12129 */
12130xmlParserCtxtPtr
12131xmlCreateFileParserCtxt(const char *filename)
12132{
12133 return(xmlCreateURLParserCtxt(filename, 0));
12134}
12135
Daniel Veillard81273902003-09-30 00:43:48 +000012136#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012137/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012138 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012139 * @sax: the SAX handler block
12140 * @filename: the filename
12141 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12142 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012143 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012144 *
12145 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12146 * compressed document is provided by default if found at compile-time.
12147 * It use the given SAX function block to handle the parsing callback.
12148 * If sax is NULL, fallback to the default DOM tree building routines.
12149 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012150 * User data (void *) is stored within the parser context in the
12151 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012152 *
Owen Taylor3473f882001-02-23 17:55:21 +000012153 * Returns the resulting document tree
12154 */
12155
12156xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012157xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12158 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012159 xmlDocPtr ret;
12160 xmlParserCtxtPtr ctxt;
12161 char *directory = NULL;
12162
Daniel Veillard635ef722001-10-29 11:48:19 +000012163 xmlInitParser();
12164
Owen Taylor3473f882001-02-23 17:55:21 +000012165 ctxt = xmlCreateFileParserCtxt(filename);
12166 if (ctxt == NULL) {
12167 return(NULL);
12168 }
12169 if (sax != NULL) {
12170 if (ctxt->sax != NULL)
12171 xmlFree(ctxt->sax);
12172 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012173 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012174 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012175 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012176 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012177 }
Owen Taylor3473f882001-02-23 17:55:21 +000012178
12179 if ((ctxt->directory == NULL) && (directory == NULL))
12180 directory = xmlParserGetDirectory(filename);
12181 if ((ctxt->directory == NULL) && (directory != NULL))
12182 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12183
Daniel Veillarddad3f682002-11-17 16:47:27 +000012184 ctxt->recovery = recovery;
12185
Owen Taylor3473f882001-02-23 17:55:21 +000012186 xmlParseDocument(ctxt);
12187
William M. Brackc07329e2003-09-08 01:57:30 +000012188 if ((ctxt->wellFormed) || recovery) {
12189 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012190 if (ret != NULL) {
12191 if (ctxt->input->buf->compressed > 0)
12192 ret->compression = 9;
12193 else
12194 ret->compression = ctxt->input->buf->compressed;
12195 }
William M. Brackc07329e2003-09-08 01:57:30 +000012196 }
Owen Taylor3473f882001-02-23 17:55:21 +000012197 else {
12198 ret = NULL;
12199 xmlFreeDoc(ctxt->myDoc);
12200 ctxt->myDoc = NULL;
12201 }
12202 if (sax != NULL)
12203 ctxt->sax = NULL;
12204 xmlFreeParserCtxt(ctxt);
12205
12206 return(ret);
12207}
12208
12209/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012210 * xmlSAXParseFile:
12211 * @sax: the SAX handler block
12212 * @filename: the filename
12213 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12214 * documents
12215 *
12216 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12217 * compressed document is provided by default if found at compile-time.
12218 * It use the given SAX function block to handle the parsing callback.
12219 * If sax is NULL, fallback to the default DOM tree building routines.
12220 *
12221 * Returns the resulting document tree
12222 */
12223
12224xmlDocPtr
12225xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12226 int recovery) {
12227 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12228}
12229
12230/**
Owen Taylor3473f882001-02-23 17:55:21 +000012231 * xmlRecoverDoc:
12232 * @cur: a pointer to an array of xmlChar
12233 *
12234 * parse an XML in-memory document and build a tree.
12235 * In the case the document is not Well Formed, a tree is built anyway
12236 *
12237 * Returns the resulting document tree
12238 */
12239
12240xmlDocPtr
12241xmlRecoverDoc(xmlChar *cur) {
12242 return(xmlSAXParseDoc(NULL, cur, 1));
12243}
12244
12245/**
12246 * xmlParseFile:
12247 * @filename: the filename
12248 *
12249 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12250 * compressed document is provided by default if found at compile-time.
12251 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012252 * Returns the resulting document tree if the file was wellformed,
12253 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012254 */
12255
12256xmlDocPtr
12257xmlParseFile(const char *filename) {
12258 return(xmlSAXParseFile(NULL, filename, 0));
12259}
12260
12261/**
12262 * xmlRecoverFile:
12263 * @filename: the filename
12264 *
12265 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12266 * compressed document is provided by default if found at compile-time.
12267 * In the case the document is not Well Formed, a tree is built anyway
12268 *
12269 * Returns the resulting document tree
12270 */
12271
12272xmlDocPtr
12273xmlRecoverFile(const char *filename) {
12274 return(xmlSAXParseFile(NULL, filename, 1));
12275}
12276
12277
12278/**
12279 * xmlSetupParserForBuffer:
12280 * @ctxt: an XML parser context
12281 * @buffer: a xmlChar * buffer
12282 * @filename: a file name
12283 *
12284 * Setup the parser context to parse a new buffer; Clears any prior
12285 * contents from the parser context. The buffer parameter must not be
12286 * NULL, but the filename parameter can be
12287 */
12288void
12289xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12290 const char* filename)
12291{
12292 xmlParserInputPtr input;
12293
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012294 if ((ctxt == NULL) || (buffer == NULL))
12295 return;
12296
Owen Taylor3473f882001-02-23 17:55:21 +000012297 input = xmlNewInputStream(ctxt);
12298 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012299 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012300 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012301 return;
12302 }
12303
12304 xmlClearParserCtxt(ctxt);
12305 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012306 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012307 input->base = buffer;
12308 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012309 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012310 inputPush(ctxt, input);
12311}
12312
12313/**
12314 * xmlSAXUserParseFile:
12315 * @sax: a SAX handler
12316 * @user_data: The user data returned on SAX callbacks
12317 * @filename: a file name
12318 *
12319 * parse an XML file and call the given SAX handler routines.
12320 * Automatic support for ZLIB/Compress compressed document is provided
12321 *
12322 * Returns 0 in case of success or a error number otherwise
12323 */
12324int
12325xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12326 const char *filename) {
12327 int ret = 0;
12328 xmlParserCtxtPtr ctxt;
12329
12330 ctxt = xmlCreateFileParserCtxt(filename);
12331 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012332#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012333 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012334#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012335 xmlFree(ctxt->sax);
12336 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012337 xmlDetectSAX2(ctxt);
12338
Owen Taylor3473f882001-02-23 17:55:21 +000012339 if (user_data != NULL)
12340 ctxt->userData = user_data;
12341
12342 xmlParseDocument(ctxt);
12343
12344 if (ctxt->wellFormed)
12345 ret = 0;
12346 else {
12347 if (ctxt->errNo != 0)
12348 ret = ctxt->errNo;
12349 else
12350 ret = -1;
12351 }
12352 if (sax != NULL)
12353 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012354 if (ctxt->myDoc != NULL) {
12355 xmlFreeDoc(ctxt->myDoc);
12356 ctxt->myDoc = NULL;
12357 }
Owen Taylor3473f882001-02-23 17:55:21 +000012358 xmlFreeParserCtxt(ctxt);
12359
12360 return ret;
12361}
Daniel Veillard81273902003-09-30 00:43:48 +000012362#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012363
12364/************************************************************************
12365 * *
12366 * Front ends when parsing from memory *
12367 * *
12368 ************************************************************************/
12369
12370/**
12371 * xmlCreateMemoryParserCtxt:
12372 * @buffer: a pointer to a char array
12373 * @size: the size of the array
12374 *
12375 * Create a parser context for an XML in-memory document.
12376 *
12377 * Returns the new parser context or NULL
12378 */
12379xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012380xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012381 xmlParserCtxtPtr ctxt;
12382 xmlParserInputPtr input;
12383 xmlParserInputBufferPtr buf;
12384
12385 if (buffer == NULL)
12386 return(NULL);
12387 if (size <= 0)
12388 return(NULL);
12389
12390 ctxt = xmlNewParserCtxt();
12391 if (ctxt == NULL)
12392 return(NULL);
12393
Daniel Veillard53350552003-09-18 13:35:51 +000012394 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012395 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012396 if (buf == NULL) {
12397 xmlFreeParserCtxt(ctxt);
12398 return(NULL);
12399 }
Owen Taylor3473f882001-02-23 17:55:21 +000012400
12401 input = xmlNewInputStream(ctxt);
12402 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012403 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012404 xmlFreeParserCtxt(ctxt);
12405 return(NULL);
12406 }
12407
12408 input->filename = NULL;
12409 input->buf = buf;
12410 input->base = input->buf->buffer->content;
12411 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012412 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012413
12414 inputPush(ctxt, input);
12415 return(ctxt);
12416}
12417
Daniel Veillard81273902003-09-30 00:43:48 +000012418#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012419/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012420 * xmlSAXParseMemoryWithData:
12421 * @sax: the SAX handler block
12422 * @buffer: an pointer to a char array
12423 * @size: the size of the array
12424 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12425 * documents
12426 * @data: the userdata
12427 *
12428 * parse an XML in-memory block and use the given SAX function block
12429 * to handle the parsing callback. If sax is NULL, fallback to the default
12430 * DOM tree building routines.
12431 *
12432 * User data (void *) is stored within the parser context in the
12433 * context's _private member, so it is available nearly everywhere in libxml
12434 *
12435 * Returns the resulting document tree
12436 */
12437
12438xmlDocPtr
12439xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12440 int size, int recovery, void *data) {
12441 xmlDocPtr ret;
12442 xmlParserCtxtPtr ctxt;
12443
12444 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12445 if (ctxt == NULL) return(NULL);
12446 if (sax != NULL) {
12447 if (ctxt->sax != NULL)
12448 xmlFree(ctxt->sax);
12449 ctxt->sax = sax;
12450 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012451 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012452 if (data!=NULL) {
12453 ctxt->_private=data;
12454 }
12455
Daniel Veillardadba5f12003-04-04 16:09:01 +000012456 ctxt->recovery = recovery;
12457
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012458 xmlParseDocument(ctxt);
12459
12460 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12461 else {
12462 ret = NULL;
12463 xmlFreeDoc(ctxt->myDoc);
12464 ctxt->myDoc = NULL;
12465 }
12466 if (sax != NULL)
12467 ctxt->sax = NULL;
12468 xmlFreeParserCtxt(ctxt);
12469
12470 return(ret);
12471}
12472
12473/**
Owen Taylor3473f882001-02-23 17:55:21 +000012474 * xmlSAXParseMemory:
12475 * @sax: the SAX handler block
12476 * @buffer: an pointer to a char array
12477 * @size: the size of the array
12478 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12479 * documents
12480 *
12481 * parse an XML in-memory block and use the given SAX function block
12482 * to handle the parsing callback. If sax is NULL, fallback to the default
12483 * DOM tree building routines.
12484 *
12485 * Returns the resulting document tree
12486 */
12487xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012488xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12489 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012490 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012491}
12492
12493/**
12494 * xmlParseMemory:
12495 * @buffer: an pointer to a char array
12496 * @size: the size of the array
12497 *
12498 * parse an XML in-memory block and build a tree.
12499 *
12500 * Returns the resulting document tree
12501 */
12502
Daniel Veillard50822cb2001-07-26 20:05:51 +000012503xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012504 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12505}
12506
12507/**
12508 * xmlRecoverMemory:
12509 * @buffer: an pointer to a char array
12510 * @size: the size of the array
12511 *
12512 * parse an XML in-memory block and build a tree.
12513 * In the case the document is not Well Formed, a tree is built anyway
12514 *
12515 * Returns the resulting document tree
12516 */
12517
Daniel Veillard50822cb2001-07-26 20:05:51 +000012518xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012519 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12520}
12521
12522/**
12523 * xmlSAXUserParseMemory:
12524 * @sax: a SAX handler
12525 * @user_data: The user data returned on SAX callbacks
12526 * @buffer: an in-memory XML document input
12527 * @size: the length of the XML document in bytes
12528 *
12529 * A better SAX parsing routine.
12530 * parse an XML in-memory buffer and call the given SAX handler routines.
12531 *
12532 * Returns 0 in case of success or a error number otherwise
12533 */
12534int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012535 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012536 int ret = 0;
12537 xmlParserCtxtPtr ctxt;
12538 xmlSAXHandlerPtr oldsax = NULL;
12539
Daniel Veillard9e923512002-08-14 08:48:52 +000012540 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012541 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12542 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012543 oldsax = ctxt->sax;
12544 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012545 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012546 if (user_data != NULL)
12547 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012548
12549 xmlParseDocument(ctxt);
12550
12551 if (ctxt->wellFormed)
12552 ret = 0;
12553 else {
12554 if (ctxt->errNo != 0)
12555 ret = ctxt->errNo;
12556 else
12557 ret = -1;
12558 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012559 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012560 if (ctxt->myDoc != NULL) {
12561 xmlFreeDoc(ctxt->myDoc);
12562 ctxt->myDoc = NULL;
12563 }
Owen Taylor3473f882001-02-23 17:55:21 +000012564 xmlFreeParserCtxt(ctxt);
12565
12566 return ret;
12567}
Daniel Veillard81273902003-09-30 00:43:48 +000012568#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012569
12570/**
12571 * xmlCreateDocParserCtxt:
12572 * @cur: a pointer to an array of xmlChar
12573 *
12574 * Creates a parser context for an XML in-memory document.
12575 *
12576 * Returns the new parser context or NULL
12577 */
12578xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012579xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012580 int len;
12581
12582 if (cur == NULL)
12583 return(NULL);
12584 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012585 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012586}
12587
Daniel Veillard81273902003-09-30 00:43:48 +000012588#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012589/**
12590 * xmlSAXParseDoc:
12591 * @sax: the SAX handler block
12592 * @cur: a pointer to an array of xmlChar
12593 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12594 * documents
12595 *
12596 * parse an XML in-memory document and build a tree.
12597 * It use the given SAX function block to handle the parsing callback.
12598 * If sax is NULL, fallback to the default DOM tree building routines.
12599 *
12600 * Returns the resulting document tree
12601 */
12602
12603xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012604xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012605 xmlDocPtr ret;
12606 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012607 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012608
Daniel Veillard38936062004-11-04 17:45:11 +000012609 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012610
12611
12612 ctxt = xmlCreateDocParserCtxt(cur);
12613 if (ctxt == NULL) return(NULL);
12614 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012615 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012616 ctxt->sax = sax;
12617 ctxt->userData = NULL;
12618 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012619 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012620
12621 xmlParseDocument(ctxt);
12622 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12623 else {
12624 ret = NULL;
12625 xmlFreeDoc(ctxt->myDoc);
12626 ctxt->myDoc = NULL;
12627 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012628 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012629 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012630 xmlFreeParserCtxt(ctxt);
12631
12632 return(ret);
12633}
12634
12635/**
12636 * xmlParseDoc:
12637 * @cur: a pointer to an array of xmlChar
12638 *
12639 * parse an XML in-memory document and build a tree.
12640 *
12641 * Returns the resulting document tree
12642 */
12643
12644xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012645xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012646 return(xmlSAXParseDoc(NULL, cur, 0));
12647}
Daniel Veillard81273902003-09-30 00:43:48 +000012648#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012649
Daniel Veillard81273902003-09-30 00:43:48 +000012650#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012651/************************************************************************
12652 * *
12653 * Specific function to keep track of entities references *
12654 * and used by the XSLT debugger *
12655 * *
12656 ************************************************************************/
12657
12658static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12659
12660/**
12661 * xmlAddEntityReference:
12662 * @ent : A valid entity
12663 * @firstNode : A valid first node for children of entity
12664 * @lastNode : A valid last node of children entity
12665 *
12666 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12667 */
12668static void
12669xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12670 xmlNodePtr lastNode)
12671{
12672 if (xmlEntityRefFunc != NULL) {
12673 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12674 }
12675}
12676
12677
12678/**
12679 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012680 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012681 *
12682 * Set the function to call call back when a xml reference has been made
12683 */
12684void
12685xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12686{
12687 xmlEntityRefFunc = func;
12688}
Daniel Veillard81273902003-09-30 00:43:48 +000012689#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012690
12691/************************************************************************
12692 * *
12693 * Miscellaneous *
12694 * *
12695 ************************************************************************/
12696
12697#ifdef LIBXML_XPATH_ENABLED
12698#include <libxml/xpath.h>
12699#endif
12700
Daniel Veillardffa3c742005-07-21 13:24:09 +000012701extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012702static int xmlParserInitialized = 0;
12703
12704/**
12705 * xmlInitParser:
12706 *
12707 * Initialization function for the XML parser.
12708 * This is not reentrant. Call once before processing in case of
12709 * use in multithreaded programs.
12710 */
12711
12712void
12713xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012714 if (xmlParserInitialized != 0)
12715 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012716
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012717 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12718 (xmlGenericError == NULL))
12719 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012720 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012721 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012722 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012723 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012724 xmlDefaultSAXHandlerInit();
12725 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012726#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012727 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012728#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012729#ifdef LIBXML_HTML_ENABLED
12730 htmlInitAutoClose();
12731 htmlDefaultSAXHandlerInit();
12732#endif
12733#ifdef LIBXML_XPATH_ENABLED
12734 xmlXPathInit();
12735#endif
12736 xmlParserInitialized = 1;
12737}
12738
12739/**
12740 * xmlCleanupParser:
12741 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012742 * Cleanup function for the XML library. It tries to reclaim all
12743 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012744 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012745 * function should not prevent reusing the library but one should
12746 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012747 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012748 */
12749
12750void
12751xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012752 if (!xmlParserInitialized)
12753 return;
12754
Owen Taylor3473f882001-02-23 17:55:21 +000012755 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012756#ifdef LIBXML_CATALOG_ENABLED
12757 xmlCatalogCleanup();
12758#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012759 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012760 xmlCleanupInputCallbacks();
12761#ifdef LIBXML_OUTPUT_ENABLED
12762 xmlCleanupOutputCallbacks();
12763#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012764#ifdef LIBXML_SCHEMAS_ENABLED
12765 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012766 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012767#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012768 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012769 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012770 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012771 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012772 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012773}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012774
12775/************************************************************************
12776 * *
12777 * New set (2.6.0) of simpler and more flexible APIs *
12778 * *
12779 ************************************************************************/
12780
12781/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012782 * DICT_FREE:
12783 * @str: a string
12784 *
12785 * Free a string if it is not owned by the "dict" dictionnary in the
12786 * current scope
12787 */
12788#define DICT_FREE(str) \
12789 if ((str) && ((!dict) || \
12790 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12791 xmlFree((char *)(str));
12792
12793/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012794 * xmlCtxtReset:
12795 * @ctxt: an XML parser context
12796 *
12797 * Reset a parser context
12798 */
12799void
12800xmlCtxtReset(xmlParserCtxtPtr ctxt)
12801{
12802 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012803 xmlDictPtr dict;
12804
12805 if (ctxt == NULL)
12806 return;
12807
12808 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012809
12810 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12811 xmlFreeInputStream(input);
12812 }
12813 ctxt->inputNr = 0;
12814 ctxt->input = NULL;
12815
12816 ctxt->spaceNr = 0;
12817 ctxt->spaceTab[0] = -1;
12818 ctxt->space = &ctxt->spaceTab[0];
12819
12820
12821 ctxt->nodeNr = 0;
12822 ctxt->node = NULL;
12823
12824 ctxt->nameNr = 0;
12825 ctxt->name = NULL;
12826
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012827 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012828 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012829 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012830 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012831 DICT_FREE(ctxt->directory);
12832 ctxt->directory = NULL;
12833 DICT_FREE(ctxt->extSubURI);
12834 ctxt->extSubURI = NULL;
12835 DICT_FREE(ctxt->extSubSystem);
12836 ctxt->extSubSystem = NULL;
12837 if (ctxt->myDoc != NULL)
12838 xmlFreeDoc(ctxt->myDoc);
12839 ctxt->myDoc = NULL;
12840
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012841 ctxt->standalone = -1;
12842 ctxt->hasExternalSubset = 0;
12843 ctxt->hasPErefs = 0;
12844 ctxt->html = 0;
12845 ctxt->external = 0;
12846 ctxt->instate = XML_PARSER_START;
12847 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012848
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012849 ctxt->wellFormed = 1;
12850 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012851 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012852 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012853#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012854 ctxt->vctxt.userData = ctxt;
12855 ctxt->vctxt.error = xmlParserValidityError;
12856 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012857#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012858 ctxt->record_info = 0;
12859 ctxt->nbChars = 0;
12860 ctxt->checkIndex = 0;
12861 ctxt->inSubset = 0;
12862 ctxt->errNo = XML_ERR_OK;
12863 ctxt->depth = 0;
12864 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12865 ctxt->catalogs = NULL;
12866 xmlInitNodeInfoSeq(&ctxt->node_seq);
12867
12868 if (ctxt->attsDefault != NULL) {
12869 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12870 ctxt->attsDefault = NULL;
12871 }
12872 if (ctxt->attsSpecial != NULL) {
12873 xmlHashFree(ctxt->attsSpecial, NULL);
12874 ctxt->attsSpecial = NULL;
12875 }
12876
Daniel Veillard4432df22003-09-28 18:58:27 +000012877#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012878 if (ctxt->catalogs != NULL)
12879 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012880#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012881 if (ctxt->lastError.code != XML_ERR_OK)
12882 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012883}
12884
12885/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012886 * xmlCtxtResetPush:
12887 * @ctxt: an XML parser context
12888 * @chunk: a pointer to an array of chars
12889 * @size: number of chars in the array
12890 * @filename: an optional file name or URI
12891 * @encoding: the document encoding, or NULL
12892 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012893 * Reset a push parser context
12894 *
12895 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012896 */
12897int
12898xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12899 int size, const char *filename, const char *encoding)
12900{
12901 xmlParserInputPtr inputStream;
12902 xmlParserInputBufferPtr buf;
12903 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12904
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012905 if (ctxt == NULL)
12906 return(1);
12907
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012908 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12909 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12910
12911 buf = xmlAllocParserInputBuffer(enc);
12912 if (buf == NULL)
12913 return(1);
12914
12915 if (ctxt == NULL) {
12916 xmlFreeParserInputBuffer(buf);
12917 return(1);
12918 }
12919
12920 xmlCtxtReset(ctxt);
12921
12922 if (ctxt->pushTab == NULL) {
12923 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12924 sizeof(xmlChar *));
12925 if (ctxt->pushTab == NULL) {
12926 xmlErrMemory(ctxt, NULL);
12927 xmlFreeParserInputBuffer(buf);
12928 return(1);
12929 }
12930 }
12931
12932 if (filename == NULL) {
12933 ctxt->directory = NULL;
12934 } else {
12935 ctxt->directory = xmlParserGetDirectory(filename);
12936 }
12937
12938 inputStream = xmlNewInputStream(ctxt);
12939 if (inputStream == NULL) {
12940 xmlFreeParserInputBuffer(buf);
12941 return(1);
12942 }
12943
12944 if (filename == NULL)
12945 inputStream->filename = NULL;
12946 else
12947 inputStream->filename = (char *)
12948 xmlCanonicPath((const xmlChar *) filename);
12949 inputStream->buf = buf;
12950 inputStream->base = inputStream->buf->buffer->content;
12951 inputStream->cur = inputStream->buf->buffer->content;
12952 inputStream->end =
12953 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12954
12955 inputPush(ctxt, inputStream);
12956
12957 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12958 (ctxt->input->buf != NULL)) {
12959 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12960 int cur = ctxt->input->cur - ctxt->input->base;
12961
12962 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12963
12964 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12965 ctxt->input->cur = ctxt->input->base + cur;
12966 ctxt->input->end =
12967 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12968 use];
12969#ifdef DEBUG_PUSH
12970 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12971#endif
12972 }
12973
12974 if (encoding != NULL) {
12975 xmlCharEncodingHandlerPtr hdlr;
12976
12977 hdlr = xmlFindCharEncodingHandler(encoding);
12978 if (hdlr != NULL) {
12979 xmlSwitchToEncoding(ctxt, hdlr);
12980 } else {
12981 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12982 "Unsupported encoding %s\n", BAD_CAST encoding);
12983 }
12984 } else if (enc != XML_CHAR_ENCODING_NONE) {
12985 xmlSwitchEncoding(ctxt, enc);
12986 }
12987
12988 return(0);
12989}
12990
12991/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012992 * xmlCtxtUseOptions:
12993 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012994 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012995 *
12996 * Applies the options to the parser context
12997 *
12998 * Returns 0 in case of success, the set of unknown or unimplemented options
12999 * in case of error.
13000 */
13001int
13002xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13003{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013004 if (ctxt == NULL)
13005 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013006 if (options & XML_PARSE_RECOVER) {
13007 ctxt->recovery = 1;
13008 options -= XML_PARSE_RECOVER;
13009 } else
13010 ctxt->recovery = 0;
13011 if (options & XML_PARSE_DTDLOAD) {
13012 ctxt->loadsubset = XML_DETECT_IDS;
13013 options -= XML_PARSE_DTDLOAD;
13014 } else
13015 ctxt->loadsubset = 0;
13016 if (options & XML_PARSE_DTDATTR) {
13017 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13018 options -= XML_PARSE_DTDATTR;
13019 }
13020 if (options & XML_PARSE_NOENT) {
13021 ctxt->replaceEntities = 1;
13022 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13023 options -= XML_PARSE_NOENT;
13024 } else
13025 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013026 if (options & XML_PARSE_PEDANTIC) {
13027 ctxt->pedantic = 1;
13028 options -= XML_PARSE_PEDANTIC;
13029 } else
13030 ctxt->pedantic = 0;
13031 if (options & XML_PARSE_NOBLANKS) {
13032 ctxt->keepBlanks = 0;
13033 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13034 options -= XML_PARSE_NOBLANKS;
13035 } else
13036 ctxt->keepBlanks = 1;
13037 if (options & XML_PARSE_DTDVALID) {
13038 ctxt->validate = 1;
13039 if (options & XML_PARSE_NOWARNING)
13040 ctxt->vctxt.warning = NULL;
13041 if (options & XML_PARSE_NOERROR)
13042 ctxt->vctxt.error = NULL;
13043 options -= XML_PARSE_DTDVALID;
13044 } else
13045 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013046 if (options & XML_PARSE_NOWARNING) {
13047 ctxt->sax->warning = NULL;
13048 options -= XML_PARSE_NOWARNING;
13049 }
13050 if (options & XML_PARSE_NOERROR) {
13051 ctxt->sax->error = NULL;
13052 ctxt->sax->fatalError = NULL;
13053 options -= XML_PARSE_NOERROR;
13054 }
Daniel Veillard81273902003-09-30 00:43:48 +000013055#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013056 if (options & XML_PARSE_SAX1) {
13057 ctxt->sax->startElement = xmlSAX2StartElement;
13058 ctxt->sax->endElement = xmlSAX2EndElement;
13059 ctxt->sax->startElementNs = NULL;
13060 ctxt->sax->endElementNs = NULL;
13061 ctxt->sax->initialized = 1;
13062 options -= XML_PARSE_SAX1;
13063 }
Daniel Veillard81273902003-09-30 00:43:48 +000013064#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013065 if (options & XML_PARSE_NODICT) {
13066 ctxt->dictNames = 0;
13067 options -= XML_PARSE_NODICT;
13068 } else {
13069 ctxt->dictNames = 1;
13070 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013071 if (options & XML_PARSE_NOCDATA) {
13072 ctxt->sax->cdataBlock = NULL;
13073 options -= XML_PARSE_NOCDATA;
13074 }
13075 if (options & XML_PARSE_NSCLEAN) {
13076 ctxt->options |= XML_PARSE_NSCLEAN;
13077 options -= XML_PARSE_NSCLEAN;
13078 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013079 if (options & XML_PARSE_NONET) {
13080 ctxt->options |= XML_PARSE_NONET;
13081 options -= XML_PARSE_NONET;
13082 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013083 if (options & XML_PARSE_COMPACT) {
13084 ctxt->options |= XML_PARSE_COMPACT;
13085 options -= XML_PARSE_COMPACT;
13086 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013087 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013088 return (options);
13089}
13090
13091/**
13092 * xmlDoRead:
13093 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013094 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013095 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013096 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013097 * @reuse: keep the context for reuse
13098 *
13099 * Common front-end for the xmlRead functions
13100 *
13101 * Returns the resulting document tree or NULL
13102 */
13103static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013104xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13105 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013106{
13107 xmlDocPtr ret;
13108
13109 xmlCtxtUseOptions(ctxt, options);
13110 if (encoding != NULL) {
13111 xmlCharEncodingHandlerPtr hdlr;
13112
13113 hdlr = xmlFindCharEncodingHandler(encoding);
13114 if (hdlr != NULL)
13115 xmlSwitchToEncoding(ctxt, hdlr);
13116 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013117 if ((URL != NULL) && (ctxt->input != NULL) &&
13118 (ctxt->input->filename == NULL))
13119 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013120 xmlParseDocument(ctxt);
13121 if ((ctxt->wellFormed) || ctxt->recovery)
13122 ret = ctxt->myDoc;
13123 else {
13124 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013125 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013126 xmlFreeDoc(ctxt->myDoc);
13127 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013128 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013129 ctxt->myDoc = NULL;
13130 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013131 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013132 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013133
13134 return (ret);
13135}
13136
13137/**
13138 * xmlReadDoc:
13139 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013140 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013141 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013142 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013143 *
13144 * parse an XML in-memory document and build a tree.
13145 *
13146 * Returns the resulting document tree
13147 */
13148xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013149xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013150{
13151 xmlParserCtxtPtr ctxt;
13152
13153 if (cur == NULL)
13154 return (NULL);
13155
13156 ctxt = xmlCreateDocParserCtxt(cur);
13157 if (ctxt == NULL)
13158 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013159 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013160}
13161
13162/**
13163 * xmlReadFile:
13164 * @filename: a file or URL
13165 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013166 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013167 *
13168 * parse an XML file from the filesystem or the network.
13169 *
13170 * Returns the resulting document tree
13171 */
13172xmlDocPtr
13173xmlReadFile(const char *filename, const char *encoding, int options)
13174{
13175 xmlParserCtxtPtr ctxt;
13176
Daniel Veillard61b93382003-11-03 14:28:31 +000013177 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013178 if (ctxt == NULL)
13179 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013180 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013181}
13182
13183/**
13184 * xmlReadMemory:
13185 * @buffer: a pointer to a char array
13186 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013187 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013188 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013189 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013190 *
13191 * parse an XML in-memory document and build a tree.
13192 *
13193 * Returns the resulting document tree
13194 */
13195xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013196xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013197{
13198 xmlParserCtxtPtr ctxt;
13199
13200 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13201 if (ctxt == NULL)
13202 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013203 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013204}
13205
13206/**
13207 * xmlReadFd:
13208 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013209 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013210 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013211 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013212 *
13213 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013214 * NOTE that the file descriptor will not be closed when the
13215 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013216 *
13217 * Returns the resulting document tree
13218 */
13219xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013220xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013221{
13222 xmlParserCtxtPtr ctxt;
13223 xmlParserInputBufferPtr input;
13224 xmlParserInputPtr stream;
13225
13226 if (fd < 0)
13227 return (NULL);
13228
13229 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13230 if (input == NULL)
13231 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013232 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013233 ctxt = xmlNewParserCtxt();
13234 if (ctxt == NULL) {
13235 xmlFreeParserInputBuffer(input);
13236 return (NULL);
13237 }
13238 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13239 if (stream == NULL) {
13240 xmlFreeParserInputBuffer(input);
13241 xmlFreeParserCtxt(ctxt);
13242 return (NULL);
13243 }
13244 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013245 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246}
13247
13248/**
13249 * xmlReadIO:
13250 * @ioread: an I/O read function
13251 * @ioclose: an I/O close function
13252 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013253 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013254 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013255 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013256 *
13257 * parse an XML document from I/O functions and source and build a tree.
13258 *
13259 * Returns the resulting document tree
13260 */
13261xmlDocPtr
13262xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013263 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013264{
13265 xmlParserCtxtPtr ctxt;
13266 xmlParserInputBufferPtr input;
13267 xmlParserInputPtr stream;
13268
13269 if (ioread == NULL)
13270 return (NULL);
13271
13272 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13273 XML_CHAR_ENCODING_NONE);
13274 if (input == NULL)
13275 return (NULL);
13276 ctxt = xmlNewParserCtxt();
13277 if (ctxt == NULL) {
13278 xmlFreeParserInputBuffer(input);
13279 return (NULL);
13280 }
13281 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13282 if (stream == NULL) {
13283 xmlFreeParserInputBuffer(input);
13284 xmlFreeParserCtxt(ctxt);
13285 return (NULL);
13286 }
13287 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013288 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289}
13290
13291/**
13292 * xmlCtxtReadDoc:
13293 * @ctxt: an XML parser context
13294 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013295 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013296 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013297 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013298 *
13299 * parse an XML in-memory document and build a tree.
13300 * This reuses the existing @ctxt parser context
13301 *
13302 * Returns the resulting document tree
13303 */
13304xmlDocPtr
13305xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013306 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013307{
13308 xmlParserInputPtr stream;
13309
13310 if (cur == NULL)
13311 return (NULL);
13312 if (ctxt == NULL)
13313 return (NULL);
13314
13315 xmlCtxtReset(ctxt);
13316
13317 stream = xmlNewStringInputStream(ctxt, cur);
13318 if (stream == NULL) {
13319 return (NULL);
13320 }
13321 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013322 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013323}
13324
13325/**
13326 * xmlCtxtReadFile:
13327 * @ctxt: an XML parser context
13328 * @filename: a file or URL
13329 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013330 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013331 *
13332 * parse an XML file from the filesystem or the network.
13333 * This reuses the existing @ctxt parser context
13334 *
13335 * Returns the resulting document tree
13336 */
13337xmlDocPtr
13338xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13339 const char *encoding, int options)
13340{
13341 xmlParserInputPtr stream;
13342
13343 if (filename == NULL)
13344 return (NULL);
13345 if (ctxt == NULL)
13346 return (NULL);
13347
13348 xmlCtxtReset(ctxt);
13349
Daniel Veillard29614c72004-11-26 10:47:26 +000013350 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013351 if (stream == NULL) {
13352 return (NULL);
13353 }
13354 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013355 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013356}
13357
13358/**
13359 * xmlCtxtReadMemory:
13360 * @ctxt: an XML parser context
13361 * @buffer: a pointer to a char array
13362 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013363 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013364 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013365 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013366 *
13367 * parse an XML in-memory document and build a tree.
13368 * This reuses the existing @ctxt parser context
13369 *
13370 * Returns the resulting document tree
13371 */
13372xmlDocPtr
13373xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013374 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013375{
13376 xmlParserInputBufferPtr input;
13377 xmlParserInputPtr stream;
13378
13379 if (ctxt == NULL)
13380 return (NULL);
13381 if (buffer == NULL)
13382 return (NULL);
13383
13384 xmlCtxtReset(ctxt);
13385
13386 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13387 if (input == NULL) {
13388 return(NULL);
13389 }
13390
13391 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13392 if (stream == NULL) {
13393 xmlFreeParserInputBuffer(input);
13394 return(NULL);
13395 }
13396
13397 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013398 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013399}
13400
13401/**
13402 * xmlCtxtReadFd:
13403 * @ctxt: an XML parser context
13404 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013405 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013406 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013407 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013408 *
13409 * parse an XML from a file descriptor and build a tree.
13410 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013411 * NOTE that the file descriptor will not be closed when the
13412 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013413 *
13414 * Returns the resulting document tree
13415 */
13416xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013417xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13418 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013419{
13420 xmlParserInputBufferPtr input;
13421 xmlParserInputPtr stream;
13422
13423 if (fd < 0)
13424 return (NULL);
13425 if (ctxt == NULL)
13426 return (NULL);
13427
13428 xmlCtxtReset(ctxt);
13429
13430
13431 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13432 if (input == NULL)
13433 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013434 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013435 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13436 if (stream == NULL) {
13437 xmlFreeParserInputBuffer(input);
13438 return (NULL);
13439 }
13440 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013441 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013442}
13443
13444/**
13445 * xmlCtxtReadIO:
13446 * @ctxt: an XML parser context
13447 * @ioread: an I/O read function
13448 * @ioclose: an I/O close function
13449 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013450 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013451 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013452 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013453 *
13454 * parse an XML document from I/O functions and source and build a tree.
13455 * This reuses the existing @ctxt parser context
13456 *
13457 * Returns the resulting document tree
13458 */
13459xmlDocPtr
13460xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13461 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013462 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013463 const char *encoding, int options)
13464{
13465 xmlParserInputBufferPtr input;
13466 xmlParserInputPtr stream;
13467
13468 if (ioread == NULL)
13469 return (NULL);
13470 if (ctxt == NULL)
13471 return (NULL);
13472
13473 xmlCtxtReset(ctxt);
13474
13475 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13476 XML_CHAR_ENCODING_NONE);
13477 if (input == NULL)
13478 return (NULL);
13479 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13480 if (stream == NULL) {
13481 xmlFreeParserInputBuffer(input);
13482 return (NULL);
13483 }
13484 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013485 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013486}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013487
13488#define bottom_parser
13489#include "elfgcchack.h"