blob: a426b7979cef53d2252cb90f7315a77f9a80da92 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
810 default:
811 break;
812 }
813 return(0);
814}
815
816/************************************************************************
817 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000818 * SAX2 defaulted attributes handling *
819 * *
820 ************************************************************************/
821
822/**
823 * xmlDetectSAX2:
824 * @ctxt: an XML parser context
825 *
826 * Do the SAX2 detection and specific intialization
827 */
828static void
829xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
830 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000831#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000832 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
833 ((ctxt->sax->startElementNs != NULL) ||
834 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000835#else
836 ctxt->sax2 = 1;
837#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838
839 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
840 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
841 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000842 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
843 (ctxt->str_xml_ns == NULL)) {
844 xmlErrMemory(ctxt, NULL);
845 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000846}
847
Daniel Veillarde57ec792003-09-10 10:50:59 +0000848typedef struct _xmlDefAttrs xmlDefAttrs;
849typedef xmlDefAttrs *xmlDefAttrsPtr;
850struct _xmlDefAttrs {
851 int nbAttrs; /* number of defaulted attributes on that element */
852 int maxAttrs; /* the size of the array */
853 const xmlChar *values[4]; /* array of localname/prefix/values */
854};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000855
856/**
857 * xmlAddDefAttrs:
858 * @ctxt: an XML parser context
859 * @fullname: the element fullname
860 * @fullattr: the attribute fullname
861 * @value: the attribute value
862 *
863 * Add a defaulted attribute for an element
864 */
865static void
866xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
867 const xmlChar *fullname,
868 const xmlChar *fullattr,
869 const xmlChar *value) {
870 xmlDefAttrsPtr defaults;
871 int len;
872 const xmlChar *name;
873 const xmlChar *prefix;
874
875 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000876 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 if (ctxt->attsDefault == NULL)
878 goto mem_error;
879 }
880
881 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000882 * split the element name into prefix:localname , the string found
883 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 */
885 name = xmlSplitQName3(fullname, &len);
886 if (name == NULL) {
887 name = xmlDictLookup(ctxt->dict, fullname, -1);
888 prefix = NULL;
889 } else {
890 name = xmlDictLookup(ctxt->dict, name, -1);
891 prefix = xmlDictLookup(ctxt->dict, fullname, len);
892 }
893
894 /*
895 * make sure there is some storage
896 */
897 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
898 if (defaults == NULL) {
899 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000900 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000901 if (defaults == NULL)
902 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000903 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000904 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000905 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
906 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000907 xmlDefAttrsPtr temp;
908
909 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000910 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000911 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000912 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000914 defaults->maxAttrs *= 2;
915 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
916 }
917
918 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000919 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 * are within the DTD and hen not associated to namespace names.
921 */
922 name = xmlSplitQName3(fullattr, &len);
923 if (name == NULL) {
924 name = xmlDictLookup(ctxt->dict, fullattr, -1);
925 prefix = NULL;
926 } else {
927 name = xmlDictLookup(ctxt->dict, name, -1);
928 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
929 }
930
931 defaults->values[4 * defaults->nbAttrs] = name;
932 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
933 /* intern the string and precompute the end */
934 len = xmlStrlen(value);
935 value = xmlDictLookup(ctxt->dict, value, len);
936 defaults->values[4 * defaults->nbAttrs + 2] = value;
937 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
938 defaults->nbAttrs++;
939
940 return;
941
942mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000943 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000944 return;
945}
946
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000947/**
948 * xmlAddSpecialAttr:
949 * @ctxt: an XML parser context
950 * @fullname: the element fullname
951 * @fullattr: the attribute fullname
952 * @type: the attribute type
953 *
954 * Register that this attribute is not CDATA
955 */
956static void
957xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
958 const xmlChar *fullname,
959 const xmlChar *fullattr,
960 int type)
961{
962 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000963 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000964 if (ctxt->attsSpecial == NULL)
965 goto mem_error;
966 }
967
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000968 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
969 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 return;
971
972mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000973 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000974 return;
975}
976
Daniel Veillard4432df22003-09-28 18:58:27 +0000977/**
978 * xmlCheckLanguageID:
979 * @lang: pointer to the string value
980 *
981 * Checks that the value conforms to the LanguageID production:
982 *
983 * NOTE: this is somewhat deprecated, those productions were removed from
984 * the XML Second edition.
985 *
986 * [33] LanguageID ::= Langcode ('-' Subcode)*
987 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
988 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
989 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
990 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
991 * [38] Subcode ::= ([a-z] | [A-Z])+
992 *
993 * Returns 1 if correct 0 otherwise
994 **/
995int
996xmlCheckLanguageID(const xmlChar * lang)
997{
998 const xmlChar *cur = lang;
999
1000 if (cur == NULL)
1001 return (0);
1002 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1003 ((cur[0] == 'I') && (cur[1] == '-'))) {
1004 /*
1005 * IANA code
1006 */
1007 cur += 2;
1008 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1009 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1010 cur++;
1011 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1012 ((cur[0] == 'X') && (cur[1] == '-'))) {
1013 /*
1014 * User code
1015 */
1016 cur += 2;
1017 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1018 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1019 cur++;
1020 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1021 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1022 /*
1023 * ISO639
1024 */
1025 cur++;
1026 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1028 cur++;
1029 else
1030 return (0);
1031 } else
1032 return (0);
1033 while (cur[0] != 0) { /* non input consuming */
1034 if (cur[0] != '-')
1035 return (0);
1036 cur++;
1037 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1038 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1039 cur++;
1040 else
1041 return (0);
1042 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1043 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1044 cur++;
1045 }
1046 return (1);
1047}
1048
Owen Taylor3473f882001-02-23 17:55:21 +00001049/************************************************************************
1050 * *
1051 * Parser stacks related functions and macros *
1052 * *
1053 ************************************************************************/
1054
1055xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1056 const xmlChar ** str);
1057
Daniel Veillard0fb18932003-09-07 09:14:37 +00001058#ifdef SAX2
1059/**
1060 * nsPush:
1061 * @ctxt: an XML parser context
1062 * @prefix: the namespace prefix or NULL
1063 * @URL: the namespace name
1064 *
1065 * Pushes a new parser namespace on top of the ns stack
1066 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001067 * Returns -1 in case of error, -2 if the namespace should be discarded
1068 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001069 */
1070static int
1071nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1072{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001073 if (ctxt->options & XML_PARSE_NSCLEAN) {
1074 int i;
1075 for (i = 0;i < ctxt->nsNr;i += 2) {
1076 if (ctxt->nsTab[i] == prefix) {
1077 /* in scope */
1078 if (ctxt->nsTab[i + 1] == URL)
1079 return(-2);
1080 /* out of scope keep it */
1081 break;
1082 }
1083 }
1084 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001085 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1086 ctxt->nsMax = 10;
1087 ctxt->nsNr = 0;
1088 ctxt->nsTab = (const xmlChar **)
1089 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1090 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001091 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001092 ctxt->nsMax = 0;
1093 return (-1);
1094 }
1095 } else if (ctxt->nsNr >= ctxt->nsMax) {
1096 ctxt->nsMax *= 2;
1097 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001098 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001099 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1100 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001101 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001102 ctxt->nsMax /= 2;
1103 return (-1);
1104 }
1105 }
1106 ctxt->nsTab[ctxt->nsNr++] = prefix;
1107 ctxt->nsTab[ctxt->nsNr++] = URL;
1108 return (ctxt->nsNr);
1109}
1110/**
1111 * nsPop:
1112 * @ctxt: an XML parser context
1113 * @nr: the number to pop
1114 *
1115 * Pops the top @nr parser prefix/namespace from the ns stack
1116 *
1117 * Returns the number of namespaces removed
1118 */
1119static int
1120nsPop(xmlParserCtxtPtr ctxt, int nr)
1121{
1122 int i;
1123
1124 if (ctxt->nsTab == NULL) return(0);
1125 if (ctxt->nsNr < nr) {
1126 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1127 nr = ctxt->nsNr;
1128 }
1129 if (ctxt->nsNr <= 0)
1130 return (0);
1131
1132 for (i = 0;i < nr;i++) {
1133 ctxt->nsNr--;
1134 ctxt->nsTab[ctxt->nsNr] = NULL;
1135 }
1136 return(nr);
1137}
1138#endif
1139
1140static int
1141xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1142 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001144 int maxatts;
1145
1146 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001147 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001148 atts = (const xmlChar **)
1149 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001151 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001152 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1153 if (attallocs == NULL) goto mem_error;
1154 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001155 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 } else if (nr + 5 > ctxt->maxatts) {
1157 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001158 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1159 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001160 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1163 (maxatts / 5) * sizeof(int));
1164 if (attallocs == NULL) goto mem_error;
1165 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001166 ctxt->maxatts = maxatts;
1167 }
1168 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001170 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172}
1173
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001174/**
1175 * inputPush:
1176 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001177 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001178 *
1179 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001180 *
1181 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001183int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001184inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1185{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001186 if ((ctxt == NULL) || (value == NULL))
1187 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001188 if (ctxt->inputNr >= ctxt->inputMax) {
1189 ctxt->inputMax *= 2;
1190 ctxt->inputTab =
1191 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1192 ctxt->inputMax *
1193 sizeof(ctxt->inputTab[0]));
1194 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001195 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001196 return (0);
1197 }
1198 }
1199 ctxt->inputTab[ctxt->inputNr] = value;
1200 ctxt->input = value;
1201 return (ctxt->inputNr++);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001204 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001205 * @ctxt: an XML parser context
1206 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001207 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001208 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001209 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001210 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001211xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001212inputPop(xmlParserCtxtPtr ctxt)
1213{
1214 xmlParserInputPtr ret;
1215
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001216 if (ctxt == NULL)
1217 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001219 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001220 ctxt->inputNr--;
1221 if (ctxt->inputNr > 0)
1222 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1223 else
1224 ctxt->input = NULL;
1225 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001226 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001227 return (ret);
1228}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001229/**
1230 * nodePush:
1231 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001232 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001233 *
1234 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001235 *
1236 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001237 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001238int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001239nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1240{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001241 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001242 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001243 xmlNodePtr *tmp;
1244
1245 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1246 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001247 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001248 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001249 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001250 return (0);
1251 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001252 ctxt->nodeTab = tmp;
1253 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001255 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001256 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001257 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1258 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001259 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001260 return(0);
1261 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001262 ctxt->nodeTab[ctxt->nodeNr] = value;
1263 ctxt->node = value;
1264 return (ctxt->nodeNr++);
1265}
1266/**
1267 * nodePop:
1268 * @ctxt: an XML parser context
1269 *
1270 * Pops the top element node from the node stack
1271 *
1272 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001273 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001274xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001275nodePop(xmlParserCtxtPtr ctxt)
1276{
1277 xmlNodePtr ret;
1278
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001279 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001280 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001281 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001282 ctxt->nodeNr--;
1283 if (ctxt->nodeNr > 0)
1284 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1285 else
1286 ctxt->node = NULL;
1287 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001288 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001289 return (ret);
1290}
Daniel Veillarda2351322004-06-27 12:08:10 +00001291
1292#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001293/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001294 * nameNsPush:
1295 * @ctxt: an XML parser context
1296 * @value: the element name
1297 * @prefix: the element prefix
1298 * @URI: the element namespace name
1299 *
1300 * Pushes a new element name/prefix/URL on top of the name stack
1301 *
1302 * Returns -1 in case of error, the index in the stack otherwise
1303 */
1304static int
1305nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1306 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1307{
1308 if (ctxt->nameNr >= ctxt->nameMax) {
1309 const xmlChar * *tmp;
1310 void **tmp2;
1311 ctxt->nameMax *= 2;
1312 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1313 ctxt->nameMax *
1314 sizeof(ctxt->nameTab[0]));
1315 if (tmp == NULL) {
1316 ctxt->nameMax /= 2;
1317 goto mem_error;
1318 }
1319 ctxt->nameTab = tmp;
1320 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1321 ctxt->nameMax * 3 *
1322 sizeof(ctxt->pushTab[0]));
1323 if (tmp2 == NULL) {
1324 ctxt->nameMax /= 2;
1325 goto mem_error;
1326 }
1327 ctxt->pushTab = tmp2;
1328 }
1329 ctxt->nameTab[ctxt->nameNr] = value;
1330 ctxt->name = value;
1331 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1332 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001333 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001334 return (ctxt->nameNr++);
1335mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001336 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001337 return (-1);
1338}
1339/**
1340 * nameNsPop:
1341 * @ctxt: an XML parser context
1342 *
1343 * Pops the top element/prefix/URI name from the name stack
1344 *
1345 * Returns the name just removed
1346 */
1347static const xmlChar *
1348nameNsPop(xmlParserCtxtPtr ctxt)
1349{
1350 const xmlChar *ret;
1351
1352 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001353 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 ctxt->nameNr--;
1355 if (ctxt->nameNr > 0)
1356 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1357 else
1358 ctxt->name = NULL;
1359 ret = ctxt->nameTab[ctxt->nameNr];
1360 ctxt->nameTab[ctxt->nameNr] = NULL;
1361 return (ret);
1362}
Daniel Veillarda2351322004-06-27 12:08:10 +00001363#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001364
1365/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001366 * namePush:
1367 * @ctxt: an XML parser context
1368 * @value: the element name
1369 *
1370 * Pushes a new element name on top of the name stack
1371 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001372 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001373 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001374int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001375namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001376{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001377 if (ctxt == NULL) return (-1);
1378
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001380 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001382 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001383 ctxt->nameMax *
1384 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001385 if (tmp == NULL) {
1386 ctxt->nameMax /= 2;
1387 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001388 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001389 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001390 }
1391 ctxt->nameTab[ctxt->nameNr] = value;
1392 ctxt->name = value;
1393 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001394mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001395 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001396 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001397}
1398/**
1399 * namePop:
1400 * @ctxt: an XML parser context
1401 *
1402 * Pops the top element name from the name stack
1403 *
1404 * Returns the name just removed
1405 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001406const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001407namePop(xmlParserCtxtPtr ctxt)
1408{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001409 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001410
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001411 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1412 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413 ctxt->nameNr--;
1414 if (ctxt->nameNr > 0)
1415 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1416 else
1417 ctxt->name = NULL;
1418 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001419 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001420 return (ret);
1421}
Owen Taylor3473f882001-02-23 17:55:21 +00001422
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001423static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001424 if (ctxt->spaceNr >= ctxt->spaceMax) {
1425 ctxt->spaceMax *= 2;
1426 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1427 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1428 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001429 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001430 return(0);
1431 }
1432 }
1433 ctxt->spaceTab[ctxt->spaceNr] = val;
1434 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1435 return(ctxt->spaceNr++);
1436}
1437
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001438static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001439 int ret;
1440 if (ctxt->spaceNr <= 0) return(0);
1441 ctxt->spaceNr--;
1442 if (ctxt->spaceNr > 0)
1443 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1444 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001445 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001446 ret = ctxt->spaceTab[ctxt->spaceNr];
1447 ctxt->spaceTab[ctxt->spaceNr] = -1;
1448 return(ret);
1449}
1450
1451/*
1452 * Macros for accessing the content. Those should be used only by the parser,
1453 * and not exported.
1454 *
1455 * Dirty macros, i.e. one often need to make assumption on the context to
1456 * use them
1457 *
1458 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1459 * To be used with extreme caution since operations consuming
1460 * characters may move the input buffer to a different location !
1461 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1462 * This should be used internally by the parser
1463 * only to compare to ASCII values otherwise it would break when
1464 * running with UTF-8 encoding.
1465 * RAW same as CUR but in the input buffer, bypass any token
1466 * extraction that may have been done
1467 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1468 * to compare on ASCII based substring.
1469 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001470 * strings without newlines within the parser.
1471 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1472 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001473 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1474 *
1475 * NEXT Skip to the next character, this does the proper decoding
1476 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001477 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001478 * CUR_CHAR(l) returns the current unicode character (int), set l
1479 * to the number of xmlChars used for the encoding [0-5].
1480 * CUR_SCHAR same but operate on a string instead of the context
1481 * COPY_BUF copy the current unicode char to the target buffer, increment
1482 * the index
1483 * GROW, SHRINK handling of input buffers
1484 */
1485
Daniel Veillardfdc91562002-07-01 21:52:03 +00001486#define RAW (*ctxt->input->cur)
1487#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001488#define NXT(val) ctxt->input->cur[(val)]
1489#define CUR_PTR ctxt->input->cur
1490
Daniel Veillarda07050d2003-10-19 14:46:32 +00001491#define CMP4( s, c1, c2, c3, c4 ) \
1492 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1493 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1494#define CMP5( s, c1, c2, c3, c4, c5 ) \
1495 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1496#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1497 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1498#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1499 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1500#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1501 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1502#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1503 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1504 ((unsigned char *) s)[ 8 ] == c9 )
1505#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1506 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1507 ((unsigned char *) s)[ 9 ] == c10 )
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001510 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001511 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001512 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001513 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1514 xmlPopInput(ctxt); \
1515 } while (0)
1516
Daniel Veillard0b787f32004-03-26 17:29:53 +00001517#define SKIPL(val) do { \
1518 int skipl; \
1519 for(skipl=0; skipl<val; skipl++) { \
1520 if (*(ctxt->input->cur) == '\n') { \
1521 ctxt->input->line++; ctxt->input->col = 1; \
1522 } else ctxt->input->col++; \
1523 ctxt->nbChars++; \
1524 ctxt->input->cur++; \
1525 } \
1526 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1527 if ((*ctxt->input->cur == 0) && \
1528 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1529 xmlPopInput(ctxt); \
1530 } while (0)
1531
Daniel Veillarda880b122003-04-21 21:36:41 +00001532#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001533 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1534 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001535 xmlSHRINK (ctxt);
1536
1537static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1538 xmlParserInputShrink(ctxt->input);
1539 if ((*ctxt->input->cur == 0) &&
1540 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1541 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001542 }
Owen Taylor3473f882001-02-23 17:55:21 +00001543
Daniel Veillarda880b122003-04-21 21:36:41 +00001544#define GROW if ((ctxt->progressive == 0) && \
1545 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001546 xmlGROW (ctxt);
1547
1548static void xmlGROW (xmlParserCtxtPtr ctxt) {
1549 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1550 if ((*ctxt->input->cur == 0) &&
1551 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1552 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001553}
Owen Taylor3473f882001-02-23 17:55:21 +00001554
1555#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1556
1557#define NEXT xmlNextChar(ctxt)
1558
Daniel Veillard21a0f912001-02-25 19:54:14 +00001559#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001560 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001561 ctxt->input->cur++; \
1562 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001563 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001564 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1565 }
1566
Owen Taylor3473f882001-02-23 17:55:21 +00001567#define NEXTL(l) do { \
1568 if (*(ctxt->input->cur) == '\n') { \
1569 ctxt->input->line++; ctxt->input->col = 1; \
1570 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001571 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001572 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001573 } while (0)
1574
1575#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1576#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1577
1578#define COPY_BUF(l,b,i,v) \
1579 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001580 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001581
1582/**
1583 * xmlSkipBlankChars:
1584 * @ctxt: the XML parser context
1585 *
1586 * skip all blanks character found at that point in the input streams.
1587 * It pops up finished entities in the process if allowable at that point.
1588 *
1589 * Returns the number of space chars skipped
1590 */
1591
1592int
1593xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001594 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001595
1596 /*
1597 * It's Okay to use CUR/NEXT here since all the blanks are on
1598 * the ASCII range.
1599 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1601 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001602 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001603 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001604 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001605 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001606 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001607 if (*cur == '\n') {
1608 ctxt->input->line++; ctxt->input->col = 1;
1609 }
1610 cur++;
1611 res++;
1612 if (*cur == 0) {
1613 ctxt->input->cur = cur;
1614 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1615 cur = ctxt->input->cur;
1616 }
1617 }
1618 ctxt->input->cur = cur;
1619 } else {
1620 int cur;
1621 do {
1622 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001623 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001624 NEXT;
1625 cur = CUR;
1626 res++;
1627 }
1628 while ((cur == 0) && (ctxt->inputNr > 1) &&
1629 (ctxt->instate != XML_PARSER_COMMENT)) {
1630 xmlPopInput(ctxt);
1631 cur = CUR;
1632 }
1633 /*
1634 * Need to handle support of entities branching here
1635 */
1636 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1637 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1638 }
Owen Taylor3473f882001-02-23 17:55:21 +00001639 return(res);
1640}
1641
1642/************************************************************************
1643 * *
1644 * Commodity functions to handle entities *
1645 * *
1646 ************************************************************************/
1647
1648/**
1649 * xmlPopInput:
1650 * @ctxt: an XML parser context
1651 *
1652 * xmlPopInput: the current input pointed by ctxt->input came to an end
1653 * pop it and return the next char.
1654 *
1655 * Returns the current xmlChar in the parser context
1656 */
1657xmlChar
1658xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001659 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001660 if (xmlParserDebugEntities)
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Popping input %d\n", ctxt->inputNr);
1663 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001664 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001665 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1666 return(xmlPopInput(ctxt));
1667 return(CUR);
1668}
1669
1670/**
1671 * xmlPushInput:
1672 * @ctxt: an XML parser context
1673 * @input: an XML parser input fragment (entity, XML fragment ...).
1674 *
1675 * xmlPushInput: switch to a new input stream which is stacked on top
1676 * of the previous one(s).
1677 */
1678void
1679xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1680 if (input == NULL) return;
1681
1682 if (xmlParserDebugEntities) {
1683 if ((ctxt->input != NULL) && (ctxt->input->filename))
1684 xmlGenericError(xmlGenericErrorContext,
1685 "%s(%d): ", ctxt->input->filename,
1686 ctxt->input->line);
1687 xmlGenericError(xmlGenericErrorContext,
1688 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1689 }
1690 inputPush(ctxt, input);
1691 GROW;
1692}
1693
1694/**
1695 * xmlParseCharRef:
1696 * @ctxt: an XML parser context
1697 *
1698 * parse Reference declarations
1699 *
1700 * [66] CharRef ::= '&#' [0-9]+ ';' |
1701 * '&#x' [0-9a-fA-F]+ ';'
1702 *
1703 * [ WFC: Legal Character ]
1704 * Characters referred to using character references must match the
1705 * production for Char.
1706 *
1707 * Returns the value parsed (as an int), 0 in case of error
1708 */
1709int
1710xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001711 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001712 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001713 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 /*
1716 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1717 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001718 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001719 (NXT(2) == 'x')) {
1720 SKIP(3);
1721 GROW;
1722 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001723 if (count++ > 20) {
1724 count = 0;
1725 GROW;
1726 }
1727 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001728 val = val * 16 + (CUR - '0');
1729 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1730 val = val * 16 + (CUR - 'a') + 10;
1731 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1732 val = val * 16 + (CUR - 'A') + 10;
1733 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001734 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001735 val = 0;
1736 break;
1737 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001738 if (val > 0x10FFFF)
1739 outofrange = val;
1740
Owen Taylor3473f882001-02-23 17:55:21 +00001741 NEXT;
1742 count++;
1743 }
1744 if (RAW == ';') {
1745 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001746 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001747 ctxt->nbChars ++;
1748 ctxt->input->cur++;
1749 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001750 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001751 SKIP(2);
1752 GROW;
1753 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001754 if (count++ > 20) {
1755 count = 0;
1756 GROW;
1757 }
1758 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001759 val = val * 10 + (CUR - '0');
1760 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001761 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001762 val = 0;
1763 break;
1764 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001765 if (val > 0x10FFFF)
1766 outofrange = val;
1767
Owen Taylor3473f882001-02-23 17:55:21 +00001768 NEXT;
1769 count++;
1770 }
1771 if (RAW == ';') {
1772 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001773 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001774 ctxt->nbChars ++;
1775 ctxt->input->cur++;
1776 }
1777 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001778 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001779 }
1780
1781 /*
1782 * [ WFC: Legal Character ]
1783 * Characters referred to using character references must match the
1784 * production for Char.
1785 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001786 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001787 return(val);
1788 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001789 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1790 "xmlParseCharRef: invalid xmlChar value %d\n",
1791 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001792 }
1793 return(0);
1794}
1795
1796/**
1797 * xmlParseStringCharRef:
1798 * @ctxt: an XML parser context
1799 * @str: a pointer to an index in the string
1800 *
1801 * parse Reference declarations, variant parsing from a string rather
1802 * than an an input flow.
1803 *
1804 * [66] CharRef ::= '&#' [0-9]+ ';' |
1805 * '&#x' [0-9a-fA-F]+ ';'
1806 *
1807 * [ WFC: Legal Character ]
1808 * Characters referred to using character references must match the
1809 * production for Char.
1810 *
1811 * Returns the value parsed (as an int), 0 in case of error, str will be
1812 * updated to the current value of the index
1813 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001814static int
Owen Taylor3473f882001-02-23 17:55:21 +00001815xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1816 const xmlChar *ptr;
1817 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001818 unsigned int val = 0;
1819 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001820
1821 if ((str == NULL) || (*str == NULL)) return(0);
1822 ptr = *str;
1823 cur = *ptr;
1824 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1825 ptr += 3;
1826 cur = *ptr;
1827 while (cur != ';') { /* Non input consuming loop */
1828 if ((cur >= '0') && (cur <= '9'))
1829 val = val * 16 + (cur - '0');
1830 else if ((cur >= 'a') && (cur <= 'f'))
1831 val = val * 16 + (cur - 'a') + 10;
1832 else if ((cur >= 'A') && (cur <= 'F'))
1833 val = val * 16 + (cur - 'A') + 10;
1834 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001835 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001836 val = 0;
1837 break;
1838 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001839 if (val > 0x10FFFF)
1840 outofrange = val;
1841
Owen Taylor3473f882001-02-23 17:55:21 +00001842 ptr++;
1843 cur = *ptr;
1844 }
1845 if (cur == ';')
1846 ptr++;
1847 } else if ((cur == '&') && (ptr[1] == '#')){
1848 ptr += 2;
1849 cur = *ptr;
1850 while (cur != ';') { /* Non input consuming loops */
1851 if ((cur >= '0') && (cur <= '9'))
1852 val = val * 10 + (cur - '0');
1853 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001854 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001855 val = 0;
1856 break;
1857 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001858 if (val > 0x10FFFF)
1859 outofrange = val;
1860
Owen Taylor3473f882001-02-23 17:55:21 +00001861 ptr++;
1862 cur = *ptr;
1863 }
1864 if (cur == ';')
1865 ptr++;
1866 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001867 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001868 return(0);
1869 }
1870 *str = ptr;
1871
1872 /*
1873 * [ WFC: Legal Character ]
1874 * Characters referred to using character references must match the
1875 * production for Char.
1876 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001877 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001878 return(val);
1879 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001880 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1881 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1882 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001883 }
1884 return(0);
1885}
1886
1887/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001888 * xmlNewBlanksWrapperInputStream:
1889 * @ctxt: an XML parser context
1890 * @entity: an Entity pointer
1891 *
1892 * Create a new input stream for wrapping
1893 * blanks around a PEReference
1894 *
1895 * Returns the new input stream or NULL
1896 */
1897
1898static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1899
Daniel Veillardf4862f02002-09-10 11:13:43 +00001900static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001901xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1902 xmlParserInputPtr input;
1903 xmlChar *buffer;
1904 size_t length;
1905 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001906 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1907 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001908 return(NULL);
1909 }
1910 if (xmlParserDebugEntities)
1911 xmlGenericError(xmlGenericErrorContext,
1912 "new blanks wrapper for entity: %s\n", entity->name);
1913 input = xmlNewInputStream(ctxt);
1914 if (input == NULL) {
1915 return(NULL);
1916 }
1917 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001918 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001919 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001921 return(NULL);
1922 }
1923 buffer [0] = ' ';
1924 buffer [1] = '%';
1925 buffer [length-3] = ';';
1926 buffer [length-2] = ' ';
1927 buffer [length-1] = 0;
1928 memcpy(buffer + 2, entity->name, length - 5);
1929 input->free = deallocblankswrapper;
1930 input->base = buffer;
1931 input->cur = buffer;
1932 input->length = length;
1933 input->end = &buffer[length];
1934 return(input);
1935}
1936
1937/**
Owen Taylor3473f882001-02-23 17:55:21 +00001938 * xmlParserHandlePEReference:
1939 * @ctxt: the parser context
1940 *
1941 * [69] PEReference ::= '%' Name ';'
1942 *
1943 * [ WFC: No Recursion ]
1944 * A parsed entity must not contain a recursive
1945 * reference to itself, either directly or indirectly.
1946 *
1947 * [ WFC: Entity Declared ]
1948 * In a document without any DTD, a document with only an internal DTD
1949 * subset which contains no parameter entity references, or a document
1950 * with "standalone='yes'", ... ... The declaration of a parameter
1951 * entity must precede any reference to it...
1952 *
1953 * [ VC: Entity Declared ]
1954 * In a document with an external subset or external parameter entities
1955 * with "standalone='no'", ... ... The declaration of a parameter entity
1956 * must precede any reference to it...
1957 *
1958 * [ WFC: In DTD ]
1959 * Parameter-entity references may only appear in the DTD.
1960 * NOTE: misleading but this is handled.
1961 *
1962 * A PEReference may have been detected in the current input stream
1963 * the handling is done accordingly to
1964 * http://www.w3.org/TR/REC-xml#entproc
1965 * i.e.
1966 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001967 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001968 */
1969void
1970xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001971 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001972 xmlEntityPtr entity = NULL;
1973 xmlParserInputPtr input;
1974
Owen Taylor3473f882001-02-23 17:55:21 +00001975 if (RAW != '%') return;
1976 switch(ctxt->instate) {
1977 case XML_PARSER_CDATA_SECTION:
1978 return;
1979 case XML_PARSER_COMMENT:
1980 return;
1981 case XML_PARSER_START_TAG:
1982 return;
1983 case XML_PARSER_END_TAG:
1984 return;
1985 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001986 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001987 return;
1988 case XML_PARSER_PROLOG:
1989 case XML_PARSER_START:
1990 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001991 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001992 return;
1993 case XML_PARSER_ENTITY_DECL:
1994 case XML_PARSER_CONTENT:
1995 case XML_PARSER_ATTRIBUTE_VALUE:
1996 case XML_PARSER_PI:
1997 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001998 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001999 /* we just ignore it there */
2000 return;
2001 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002002 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002003 return;
2004 case XML_PARSER_ENTITY_VALUE:
2005 /*
2006 * NOTE: in the case of entity values, we don't do the
2007 * substitution here since we need the literal
2008 * entity value to be able to save the internal
2009 * subset of the document.
2010 * This will be handled by xmlStringDecodeEntities
2011 */
2012 return;
2013 case XML_PARSER_DTD:
2014 /*
2015 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2016 * In the internal DTD subset, parameter-entity references
2017 * can occur only where markup declarations can occur, not
2018 * within markup declarations.
2019 * In that case this is handled in xmlParseMarkupDecl
2020 */
2021 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2022 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002023 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002024 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002025 break;
2026 case XML_PARSER_IGNORE:
2027 return;
2028 }
2029
2030 NEXT;
2031 name = xmlParseName(ctxt);
2032 if (xmlParserDebugEntities)
2033 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002034 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002035 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002036 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002037 } else {
2038 if (RAW == ';') {
2039 NEXT;
2040 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2041 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2042 if (entity == NULL) {
2043
2044 /*
2045 * [ WFC: Entity Declared ]
2046 * In a document without any DTD, a document with only an
2047 * internal DTD subset which contains no parameter entity
2048 * references, or a document with "standalone='yes'", ...
2049 * ... The declaration of a parameter entity must precede
2050 * any reference to it...
2051 */
2052 if ((ctxt->standalone == 1) ||
2053 ((ctxt->hasExternalSubset == 0) &&
2054 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002055 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002056 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002057 } else {
2058 /*
2059 * [ VC: Entity Declared ]
2060 * In a document with an external subset or external
2061 * parameter entities with "standalone='no'", ...
2062 * ... The declaration of a parameter entity must precede
2063 * any reference to it...
2064 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002065 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2066 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2067 "PEReference: %%%s; not found\n",
2068 name);
2069 } else
2070 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2071 "PEReference: %%%s; not found\n",
2072 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002073 ctxt->valid = 0;
2074 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002075 } else if (ctxt->input->free != deallocblankswrapper) {
2076 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2077 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002078 } else {
2079 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2080 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002081 xmlChar start[4];
2082 xmlCharEncoding enc;
2083
Owen Taylor3473f882001-02-23 17:55:21 +00002084 /*
2085 * handle the extra spaces added before and after
2086 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002087 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002088 */
2089 input = xmlNewEntityInputStream(ctxt, entity);
2090 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002091
2092 /*
2093 * Get the 4 first bytes and decode the charset
2094 * if enc != XML_CHAR_ENCODING_NONE
2095 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002096 * Note that, since we may have some non-UTF8
2097 * encoding (like UTF16, bug 135229), the 'length'
2098 * is not known, but we can calculate based upon
2099 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002100 */
2101 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002103 start[0] = RAW;
2104 start[1] = NXT(1);
2105 start[2] = NXT(2);
2106 start[3] = NXT(3);
2107 enc = xmlDetectCharEncoding(start, 4);
2108 if (enc != XML_CHAR_ENCODING_NONE) {
2109 xmlSwitchEncoding(ctxt, enc);
2110 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002114 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2115 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002116 xmlParseTextDecl(ctxt);
2117 }
Owen Taylor3473f882001-02-23 17:55:21 +00002118 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002119 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2120 "PEReference: %s is not a parameter entity\n",
2121 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002122 }
2123 }
2124 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002125 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002126 }
Owen Taylor3473f882001-02-23 17:55:21 +00002127 }
2128}
2129
2130/*
2131 * Macro used to grow the current buffer.
2132 */
2133#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002134 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002135 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002136 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002137 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002138 if (tmp == NULL) goto mem_error; \
2139 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002140}
2141
2142/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002143 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002144 * @ctxt: the parser context
2145 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002146 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002147 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2148 * @end: an end marker xmlChar, 0 if none
2149 * @end2: an end marker xmlChar, 0 if none
2150 * @end3: an end marker xmlChar, 0 if none
2151 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002152 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002153 *
2154 * [67] Reference ::= EntityRef | CharRef
2155 *
2156 * [69] PEReference ::= '%' Name ';'
2157 *
2158 * Returns A newly allocated string with the substitution done. The caller
2159 * must deallocate it !
2160 */
2161xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002162xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2163 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002164 xmlChar *buffer = NULL;
2165 int buffer_size = 0;
2166
2167 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002169 xmlEntityPtr ent;
2170 int c,l;
2171 int nbchars = 0;
2172
Daniel Veillarda82b1822004-11-08 16:24:57 +00002173 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002174 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002175 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002176
2177 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002178 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002179 return(NULL);
2180 }
2181
2182 /*
2183 * allocate a translation buffer.
2184 */
2185 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002186 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002187 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002188
2189 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002190 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002191 * we are operating on already parsed values.
2192 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002193 if (str < last)
2194 c = CUR_SCHAR(str, l);
2195 else
2196 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002197 while ((c != 0) && (c != end) && /* non input consuming loop */
2198 (c != end2) && (c != end3)) {
2199
2200 if (c == 0) break;
2201 if ((c == '&') && (str[1] == '#')) {
2202 int val = xmlParseStringCharRef(ctxt, &str);
2203 if (val != 0) {
2204 COPY_BUF(0,buffer,nbchars,val);
2205 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002206 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2207 growBuffer(buffer);
2208 }
Owen Taylor3473f882001-02-23 17:55:21 +00002209 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "String decoding Entity Reference: %.30s\n",
2213 str);
2214 ent = xmlParseStringEntityRef(ctxt, &str);
2215 if ((ent != NULL) &&
2216 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2217 if (ent->content != NULL) {
2218 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002219 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2220 growBuffer(buffer);
2221 }
Owen Taylor3473f882001-02-23 17:55:21 +00002222 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002223 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002225 }
2226 } else if ((ent != NULL) && (ent->content != NULL)) {
2227 xmlChar *rep;
2228
2229 ctxt->depth++;
2230 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2231 0, 0, 0);
2232 ctxt->depth--;
2233 if (rep != NULL) {
2234 current = rep;
2235 while (*current != 0) { /* non input consuming loop */
2236 buffer[nbchars++] = *current++;
2237 if (nbchars >
2238 buffer_size - XML_PARSER_BUFFER_SIZE) {
2239 growBuffer(buffer);
2240 }
2241 }
2242 xmlFree(rep);
2243 }
2244 } else if (ent != NULL) {
2245 int i = xmlStrlen(ent->name);
2246 const xmlChar *cur = ent->name;
2247
2248 buffer[nbchars++] = '&';
2249 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2250 growBuffer(buffer);
2251 }
2252 for (;i > 0;i--)
2253 buffer[nbchars++] = *cur++;
2254 buffer[nbchars++] = ';';
2255 }
2256 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2257 if (xmlParserDebugEntities)
2258 xmlGenericError(xmlGenericErrorContext,
2259 "String decoding PE Reference: %.30s\n", str);
2260 ent = xmlParseStringPEReference(ctxt, &str);
2261 if (ent != NULL) {
2262 xmlChar *rep;
2263
2264 ctxt->depth++;
2265 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2266 0, 0, 0);
2267 ctxt->depth--;
2268 if (rep != NULL) {
2269 current = rep;
2270 while (*current != 0) { /* non input consuming loop */
2271 buffer[nbchars++] = *current++;
2272 if (nbchars >
2273 buffer_size - XML_PARSER_BUFFER_SIZE) {
2274 growBuffer(buffer);
2275 }
2276 }
2277 xmlFree(rep);
2278 }
2279 }
2280 } else {
2281 COPY_BUF(l,buffer,nbchars,c);
2282 str += l;
2283 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2284 growBuffer(buffer);
2285 }
2286 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002287 if (str < last)
2288 c = CUR_SCHAR(str, l);
2289 else
2290 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292 buffer[nbchars++] = 0;
2293 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002294
2295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002297 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298}
2299
Daniel Veillarde57ec792003-09-10 10:50:59 +00002300/**
2301 * xmlStringDecodeEntities:
2302 * @ctxt: the parser context
2303 * @str: the input string
2304 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2305 * @end: an end marker xmlChar, 0 if none
2306 * @end2: an end marker xmlChar, 0 if none
2307 * @end3: an end marker xmlChar, 0 if none
2308 *
2309 * Takes a entity string content and process to do the adequate substitutions.
2310 *
2311 * [67] Reference ::= EntityRef | CharRef
2312 *
2313 * [69] PEReference ::= '%' Name ';'
2314 *
2315 * Returns A newly allocated string with the substitution done. The caller
2316 * must deallocate it !
2317 */
2318xmlChar *
2319xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2320 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002321 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002322 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2323 end, end2, end3));
2324}
Owen Taylor3473f882001-02-23 17:55:21 +00002325
2326/************************************************************************
2327 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002328 * Commodity functions, cleanup needed ? *
2329 * *
2330 ************************************************************************/
2331
2332/**
2333 * areBlanks:
2334 * @ctxt: an XML parser context
2335 * @str: a xmlChar *
2336 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002337 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002338 *
2339 * Is this a sequence of blank chars that one can ignore ?
2340 *
2341 * Returns 1 if ignorable 0 otherwise.
2342 */
2343
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002344static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2345 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002346 int i, ret;
2347 xmlNodePtr lastChild;
2348
Daniel Veillard05c13a22001-09-09 08:38:09 +00002349 /*
2350 * Don't spend time trying to differentiate them, the same callback is
2351 * used !
2352 */
2353 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002354 return(0);
2355
Owen Taylor3473f882001-02-23 17:55:21 +00002356 /*
2357 * Check for xml:space value.
2358 */
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00002359 if ((ctxt->space == NULL) || (*(ctxt->space) == 1))
Owen Taylor3473f882001-02-23 17:55:21 +00002360 return(0);
2361
2362 /*
2363 * Check that the string is made of blanks
2364 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002365 if (blank_chars == 0) {
2366 for (i = 0;i < len;i++)
2367 if (!(IS_BLANK_CH(str[i]))) return(0);
2368 }
Owen Taylor3473f882001-02-23 17:55:21 +00002369
2370 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002371 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002372 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002373 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002374 if (ctxt->myDoc != NULL) {
2375 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2376 if (ret == 0) return(1);
2377 if (ret == 1) return(0);
2378 }
2379
2380 /*
2381 * Otherwise, heuristic :-\
2382 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002383 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002384 if ((ctxt->node->children == NULL) &&
2385 (RAW == '<') && (NXT(1) == '/')) return(0);
2386
2387 lastChild = xmlGetLastChild(ctxt->node);
2388 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002389 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2390 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 } else if (xmlNodeIsText(lastChild))
2392 return(0);
2393 else if ((ctxt->node->children != NULL) &&
2394 (xmlNodeIsText(ctxt->node->children)))
2395 return(0);
2396 return(1);
2397}
2398
Owen Taylor3473f882001-02-23 17:55:21 +00002399/************************************************************************
2400 * *
2401 * Extra stuff for namespace support *
2402 * Relates to http://www.w3.org/TR/WD-xml-names *
2403 * *
2404 ************************************************************************/
2405
2406/**
2407 * xmlSplitQName:
2408 * @ctxt: an XML parser context
2409 * @name: an XML parser context
2410 * @prefix: a xmlChar **
2411 *
2412 * parse an UTF8 encoded XML qualified name string
2413 *
2414 * [NS 5] QName ::= (Prefix ':')? LocalPart
2415 *
2416 * [NS 6] Prefix ::= NCName
2417 *
2418 * [NS 7] LocalPart ::= NCName
2419 *
2420 * Returns the local part, and prefix is updated
2421 * to get the Prefix if any.
2422 */
2423
2424xmlChar *
2425xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2426 xmlChar buf[XML_MAX_NAMELEN + 5];
2427 xmlChar *buffer = NULL;
2428 int len = 0;
2429 int max = XML_MAX_NAMELEN;
2430 xmlChar *ret = NULL;
2431 const xmlChar *cur = name;
2432 int c;
2433
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002434 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002435 *prefix = NULL;
2436
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002437 if (cur == NULL) return(NULL);
2438
Owen Taylor3473f882001-02-23 17:55:21 +00002439#ifndef XML_XML_NAMESPACE
2440 /* xml: prefix is not really a namespace */
2441 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2442 (cur[2] == 'l') && (cur[3] == ':'))
2443 return(xmlStrdup(name));
2444#endif
2445
Daniel Veillard597bc482003-07-24 16:08:28 +00002446 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002447 if (cur[0] == ':')
2448 return(xmlStrdup(name));
2449
2450 c = *cur++;
2451 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2452 buf[len++] = c;
2453 c = *cur++;
2454 }
2455 if (len >= max) {
2456 /*
2457 * Okay someone managed to make a huge name, so he's ready to pay
2458 * for the processing speed.
2459 */
2460 max = len * 2;
2461
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002462 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002463 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002464 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 return(NULL);
2466 }
2467 memcpy(buffer, buf, len);
2468 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2469 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002470 xmlChar *tmp;
2471
Owen Taylor3473f882001-02-23 17:55:21 +00002472 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002473 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002474 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002475 if (tmp == NULL) {
2476 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002477 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002478 return(NULL);
2479 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002481 }
2482 buffer[len++] = c;
2483 c = *cur++;
2484 }
2485 buffer[len] = 0;
2486 }
2487
Daniel Veillard597bc482003-07-24 16:08:28 +00002488 /* nasty but well=formed
2489 if ((c == ':') && (*cur == 0)) {
2490 return(xmlStrdup(name));
2491 } */
2492
Owen Taylor3473f882001-02-23 17:55:21 +00002493 if (buffer == NULL)
2494 ret = xmlStrndup(buf, len);
2495 else {
2496 ret = buffer;
2497 buffer = NULL;
2498 max = XML_MAX_NAMELEN;
2499 }
2500
2501
2502 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002503 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002504 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002505 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002506 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002507 }
Owen Taylor3473f882001-02-23 17:55:21 +00002508 len = 0;
2509
Daniel Veillardbb284f42002-10-16 18:02:47 +00002510 /*
2511 * Check that the first character is proper to start
2512 * a new name
2513 */
2514 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2515 ((c >= 0x41) && (c <= 0x5A)) ||
2516 (c == '_') || (c == ':'))) {
2517 int l;
2518 int first = CUR_SCHAR(cur, l);
2519
2520 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002521 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002522 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002523 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002524 }
2525 }
2526 cur++;
2527
Owen Taylor3473f882001-02-23 17:55:21 +00002528 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2529 buf[len++] = c;
2530 c = *cur++;
2531 }
2532 if (len >= max) {
2533 /*
2534 * Okay someone managed to make a huge name, so he's ready to pay
2535 * for the processing speed.
2536 */
2537 max = len * 2;
2538
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002539 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002540 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002541 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002542 return(NULL);
2543 }
2544 memcpy(buffer, buf, len);
2545 while (c != 0) { /* tested bigname2.xml */
2546 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002547 xmlChar *tmp;
2548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002550 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002551 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002552 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002553 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002554 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002555 return(NULL);
2556 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002557 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 buffer[len++] = c;
2560 c = *cur++;
2561 }
2562 buffer[len] = 0;
2563 }
2564
2565 if (buffer == NULL)
2566 ret = xmlStrndup(buf, len);
2567 else {
2568 ret = buffer;
2569 }
2570 }
2571
2572 return(ret);
2573}
2574
2575/************************************************************************
2576 * *
2577 * The parser itself *
2578 * Relates to http://www.w3.org/TR/REC-xml *
2579 * *
2580 ************************************************************************/
2581
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002582static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002583static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002584 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002585
Owen Taylor3473f882001-02-23 17:55:21 +00002586/**
2587 * xmlParseName:
2588 * @ctxt: an XML parser context
2589 *
2590 * parse an XML name.
2591 *
2592 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2593 * CombiningChar | Extender
2594 *
2595 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2596 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002597 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002598 *
2599 * Returns the Name parsed or NULL
2600 */
2601
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002602const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002603xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002604 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002605 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002606 int count = 0;
2607
2608 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002609
2610 /*
2611 * Accelerator for simple ASCII names
2612 */
2613 in = ctxt->input->cur;
2614 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2615 ((*in >= 0x41) && (*in <= 0x5A)) ||
2616 (*in == '_') || (*in == ':')) {
2617 in++;
2618 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2619 ((*in >= 0x41) && (*in <= 0x5A)) ||
2620 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002621 (*in == '_') || (*in == '-') ||
2622 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002623 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002624 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002625 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002626 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002627 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002628 ctxt->nbChars += count;
2629 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002630 if (ret == NULL)
2631 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 return(ret);
2633 }
2634 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002635 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002636}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002637
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638/**
2639 * xmlParseNameAndCompare:
2640 * @ctxt: an XML parser context
2641 *
2642 * parse an XML name and compares for match
2643 * (specialized for endtag parsing)
2644 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002645 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2646 * and the name for mismatch
2647 */
2648
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002649static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002650xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002651 register const xmlChar *cmp = other;
2652 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002653 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654
2655 GROW;
2656
2657 in = ctxt->input->cur;
2658 while (*in != 0 && *in == *cmp) {
2659 ++in;
2660 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002661 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002662 }
William M. Brack76e95df2003-10-18 16:20:14 +00002663 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002664 /* success */
2665 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002666 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002667 }
2668 /* failure (or end of input buffer), check with full function */
2669 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002670 /* strings coming from the dictionnary direct compare possible */
2671 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002672 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 }
2674 return ret;
2675}
2676
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002677static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002678xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002679 int len = 0, l;
2680 int c;
2681 int count = 0;
2682
2683 /*
2684 * Handler for more complex cases
2685 */
2686 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002687 c = CUR_CHAR(l);
2688 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2689 (!IS_LETTER(c) && (c != '_') &&
2690 (c != ':'))) {
2691 return(NULL);
2692 }
2693
2694 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002695 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002696 (c == '.') || (c == '-') ||
2697 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002698 (IS_COMBINING(c)) ||
2699 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002700 if (count++ > 100) {
2701 count = 0;
2702 GROW;
2703 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002704 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002705 NEXTL(l);
2706 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 }
Daniel Veillard96688262005-08-23 18:14:12 +00002708 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2709 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002710 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002711}
2712
2713/**
2714 * xmlParseStringName:
2715 * @ctxt: an XML parser context
2716 * @str: a pointer to the string pointer (IN/OUT)
2717 *
2718 * parse an XML name.
2719 *
2720 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2721 * CombiningChar | Extender
2722 *
2723 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2724 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002725 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002726 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002727 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002728 * is updated to the current location in the string.
2729 */
2730
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002731static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002732xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2733 xmlChar buf[XML_MAX_NAMELEN + 5];
2734 const xmlChar *cur = *str;
2735 int len = 0, l;
2736 int c;
2737
2738 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002739 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002740 (c != ':')) {
2741 return(NULL);
2742 }
2743
William M. Brack871611b2003-10-18 04:53:14 +00002744 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002745 (c == '.') || (c == '-') ||
2746 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002747 (IS_COMBINING(c)) ||
2748 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002749 COPY_BUF(l,buf,len,c);
2750 cur += l;
2751 c = CUR_SCHAR(cur, l);
2752 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2753 /*
2754 * Okay someone managed to make a huge name, so he's ready to pay
2755 * for the processing speed.
2756 */
2757 xmlChar *buffer;
2758 int max = len * 2;
2759
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002760 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002761 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002762 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002763 return(NULL);
2764 }
2765 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002766 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002767 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002768 (c == '.') || (c == '-') ||
2769 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002770 (IS_COMBINING(c)) ||
2771 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002772 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002773 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002775 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002776 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002777 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002778 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002779 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002780 return(NULL);
2781 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 }
2784 COPY_BUF(l,buffer,len,c);
2785 cur += l;
2786 c = CUR_SCHAR(cur, l);
2787 }
2788 buffer[len] = 0;
2789 *str = cur;
2790 return(buffer);
2791 }
2792 }
2793 *str = cur;
2794 return(xmlStrndup(buf, len));
2795}
2796
2797/**
2798 * xmlParseNmtoken:
2799 * @ctxt: an XML parser context
2800 *
2801 * parse an XML Nmtoken.
2802 *
2803 * [7] Nmtoken ::= (NameChar)+
2804 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002805 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002806 *
2807 * Returns the Nmtoken parsed or NULL
2808 */
2809
2810xmlChar *
2811xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2812 xmlChar buf[XML_MAX_NAMELEN + 5];
2813 int len = 0, l;
2814 int c;
2815 int count = 0;
2816
2817 GROW;
2818 c = CUR_CHAR(l);
2819
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 COPY_BUF(l,buf,len,c);
2830 NEXTL(l);
2831 c = CUR_CHAR(l);
2832 if (len >= XML_MAX_NAMELEN) {
2833 /*
2834 * Okay someone managed to make a huge token, so he's ready to pay
2835 * for the processing speed.
2836 */
2837 xmlChar *buffer;
2838 int max = len * 2;
2839
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002840 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002841 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002842 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002843 return(NULL);
2844 }
2845 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002846 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002847 (c == '.') || (c == '-') ||
2848 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002849 (IS_COMBINING(c)) ||
2850 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002851 if (count++ > 100) {
2852 count = 0;
2853 GROW;
2854 }
2855 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002856 xmlChar *tmp;
2857
Owen Taylor3473f882001-02-23 17:55:21 +00002858 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002859 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002860 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002861 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002862 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002863 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002864 return(NULL);
2865 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002866 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002867 }
2868 COPY_BUF(l,buffer,len,c);
2869 NEXTL(l);
2870 c = CUR_CHAR(l);
2871 }
2872 buffer[len] = 0;
2873 return(buffer);
2874 }
2875 }
2876 if (len == 0)
2877 return(NULL);
2878 return(xmlStrndup(buf, len));
2879}
2880
2881/**
2882 * xmlParseEntityValue:
2883 * @ctxt: an XML parser context
2884 * @orig: if non-NULL store a copy of the original entity value
2885 *
2886 * parse a value for ENTITY declarations
2887 *
2888 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2889 * "'" ([^%&'] | PEReference | Reference)* "'"
2890 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002891 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002892 */
2893
2894xmlChar *
2895xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2896 xmlChar *buf = NULL;
2897 int len = 0;
2898 int size = XML_PARSER_BUFFER_SIZE;
2899 int c, l;
2900 xmlChar stop;
2901 xmlChar *ret = NULL;
2902 const xmlChar *cur = NULL;
2903 xmlParserInputPtr input;
2904
2905 if (RAW == '"') stop = '"';
2906 else if (RAW == '\'') stop = '\'';
2907 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002908 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002909 return(NULL);
2910 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002911 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002912 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002913 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002914 return(NULL);
2915 }
2916
2917 /*
2918 * The content of the entity definition is copied in a buffer.
2919 */
2920
2921 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2922 input = ctxt->input;
2923 GROW;
2924 NEXT;
2925 c = CUR_CHAR(l);
2926 /*
2927 * NOTE: 4.4.5 Included in Literal
2928 * When a parameter entity reference appears in a literal entity
2929 * value, ... a single or double quote character in the replacement
2930 * text is always treated as a normal data character and will not
2931 * terminate the literal.
2932 * In practice it means we stop the loop only when back at parsing
2933 * the initial entity and the quote is found
2934 */
William M. Brack871611b2003-10-18 04:53:14 +00002935 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002936 (ctxt->input != input))) {
2937 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002938 xmlChar *tmp;
2939
Owen Taylor3473f882001-02-23 17:55:21 +00002940 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002941 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2942 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002943 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002944 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002945 return(NULL);
2946 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 }
2949 COPY_BUF(l,buf,len,c);
2950 NEXTL(l);
2951 /*
2952 * Pop-up of finished entities.
2953 */
2954 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2955 xmlPopInput(ctxt);
2956
2957 GROW;
2958 c = CUR_CHAR(l);
2959 if (c == 0) {
2960 GROW;
2961 c = CUR_CHAR(l);
2962 }
2963 }
2964 buf[len] = 0;
2965
2966 /*
2967 * Raise problem w.r.t. '&' and '%' being used in non-entities
2968 * reference constructs. Note Charref will be handled in
2969 * xmlStringDecodeEntities()
2970 */
2971 cur = buf;
2972 while (*cur != 0) { /* non input consuming */
2973 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2974 xmlChar *name;
2975 xmlChar tmp = *cur;
2976
2977 cur++;
2978 name = xmlParseStringName(ctxt, &cur);
2979 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002980 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002981 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002982 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002983 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002984 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2985 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002986 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002987 }
2988 if (name != NULL)
2989 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002990 if (*cur == 0)
2991 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
2993 cur++;
2994 }
2995
2996 /*
2997 * Then PEReference entities are substituted.
2998 */
2999 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003000 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003001 xmlFree(buf);
3002 } else {
3003 NEXT;
3004 /*
3005 * NOTE: 4.4.7 Bypassed
3006 * When a general entity reference appears in the EntityValue in
3007 * an entity declaration, it is bypassed and left as is.
3008 * so XML_SUBSTITUTE_REF is not set here.
3009 */
3010 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3011 0, 0, 0);
3012 if (orig != NULL)
3013 *orig = buf;
3014 else
3015 xmlFree(buf);
3016 }
3017
3018 return(ret);
3019}
3020
3021/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003022 * xmlParseAttValueComplex:
3023 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003024 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003025 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003026 *
3027 * parse a value for an attribute, this is the fallback function
3028 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003029 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003030 *
3031 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3032 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003035 xmlChar limit = 0;
3036 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003037 int len = 0;
3038 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003039 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003040 xmlChar *current = NULL;
3041 xmlEntityPtr ent;
3042
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (NXT(0) == '"') {
3044 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3045 limit = '"';
3046 NEXT;
3047 } else if (NXT(0) == '\'') {
3048 limit = '\'';
3049 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3050 NEXT;
3051 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003052 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003053 return(NULL);
3054 }
3055
3056 /*
3057 * allocate a translation buffer.
3058 */
3059 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003060 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003061 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003062
3063 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003064 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003065 */
3066 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003067 while ((NXT(0) != limit) && /* checked */
3068 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003069 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003070 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003071 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003072 if (NXT(1) == '#') {
3073 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003074
Owen Taylor3473f882001-02-23 17:55:21 +00003075 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003076 if (ctxt->replaceEntities) {
3077 if (len > buf_size - 10) {
3078 growBuffer(buf);
3079 }
3080 buf[len++] = '&';
3081 } else {
3082 /*
3083 * The reparsing will be done in xmlStringGetNodeList()
3084 * called by the attribute() function in SAX.c
3085 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003089 buf[len++] = '&';
3090 buf[len++] = '#';
3091 buf[len++] = '3';
3092 buf[len++] = '8';
3093 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003094 }
3095 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003096 if (len > buf_size - 10) {
3097 growBuffer(buf);
3098 }
Owen Taylor3473f882001-02-23 17:55:21 +00003099 len += xmlCopyChar(0, &buf[len], val);
3100 }
3101 } else {
3102 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003103 if ((ent != NULL) &&
3104 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
3108 if ((ctxt->replaceEntities == 0) &&
3109 (ent->content[0] == '&')) {
3110 buf[len++] = '&';
3111 buf[len++] = '#';
3112 buf[len++] = '3';
3113 buf[len++] = '8';
3114 buf[len++] = ';';
3115 } else {
3116 buf[len++] = ent->content[0];
3117 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003118 } else if ((ent != NULL) &&
3119 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003120 xmlChar *rep;
3121
3122 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3123 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003124 XML_SUBSTITUTE_REF,
3125 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003126 if (rep != NULL) {
3127 current = rep;
3128 while (*current != 0) { /* non input consuming */
3129 buf[len++] = *current++;
3130 if (len > buf_size - 10) {
3131 growBuffer(buf);
3132 }
3133 }
3134 xmlFree(rep);
3135 }
3136 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003137 if (len > buf_size - 10) {
3138 growBuffer(buf);
3139 }
Owen Taylor3473f882001-02-23 17:55:21 +00003140 if (ent->content != NULL)
3141 buf[len++] = ent->content[0];
3142 }
3143 } else if (ent != NULL) {
3144 int i = xmlStrlen(ent->name);
3145 const xmlChar *cur = ent->name;
3146
3147 /*
3148 * This may look absurd but is needed to detect
3149 * entities problems
3150 */
3151 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3152 (ent->content != NULL)) {
3153 xmlChar *rep;
3154 rep = xmlStringDecodeEntities(ctxt, ent->content,
3155 XML_SUBSTITUTE_REF, 0, 0, 0);
3156 if (rep != NULL)
3157 xmlFree(rep);
3158 }
3159
3160 /*
3161 * Just output the reference
3162 */
3163 buf[len++] = '&';
3164 if (len > buf_size - i - 10) {
3165 growBuffer(buf);
3166 }
3167 for (;i > 0;i--)
3168 buf[len++] = *cur++;
3169 buf[len++] = ';';
3170 }
3171 }
3172 } else {
3173 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003174 if ((len != 0) || (!normalize)) {
3175 if ((!normalize) || (!in_space)) {
3176 COPY_BUF(l,buf,len,0x20);
3177 if (len > buf_size - 10) {
3178 growBuffer(buf);
3179 }
3180 }
3181 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003182 }
3183 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003184 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003185 COPY_BUF(l,buf,len,c);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 NEXTL(l);
3191 }
3192 GROW;
3193 c = CUR_CHAR(l);
3194 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003195 if ((in_space) && (normalize)) {
3196 while (buf[len - 1] == 0x20) len--;
3197 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003198 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003199 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003200 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003201 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003202 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3203 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003204 } else
3205 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003206 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003207 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003208
3209mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003210 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003211 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003212}
3213
3214/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 * xmlParseAttValue:
3216 * @ctxt: an XML parser context
3217 *
3218 * parse a value for an attribute
3219 * Note: the parser won't do substitution of entities here, this
3220 * will be handled later in xmlStringGetNodeList
3221 *
3222 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3223 * "'" ([^<&'] | Reference)* "'"
3224 *
3225 * 3.3.3 Attribute-Value Normalization:
3226 * Before the value of an attribute is passed to the application or
3227 * checked for validity, the XML processor must normalize it as follows:
3228 * - a character reference is processed by appending the referenced
3229 * character to the attribute value
3230 * - an entity reference is processed by recursively processing the
3231 * replacement text of the entity
3232 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3233 * appending #x20 to the normalized value, except that only a single
3234 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3235 * parsed entity or the literal entity value of an internal parsed entity
3236 * - other characters are processed by appending them to the normalized value
3237 * If the declared value is not CDATA, then the XML processor must further
3238 * process the normalized attribute value by discarding any leading and
3239 * trailing space (#x20) characters, and by replacing sequences of space
3240 * (#x20) characters by a single space (#x20) character.
3241 * All attributes for which no declaration has been read should be treated
3242 * by a non-validating parser as if declared CDATA.
3243 *
3244 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3245 */
3246
3247
3248xmlChar *
3249xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003250 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003251 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003252}
3253
3254/**
Owen Taylor3473f882001-02-23 17:55:21 +00003255 * xmlParseSystemLiteral:
3256 * @ctxt: an XML parser context
3257 *
3258 * parse an XML Literal
3259 *
3260 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3261 *
3262 * Returns the SystemLiteral parsed or NULL
3263 */
3264
3265xmlChar *
3266xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3267 xmlChar *buf = NULL;
3268 int len = 0;
3269 int size = XML_PARSER_BUFFER_SIZE;
3270 int cur, l;
3271 xmlChar stop;
3272 int state = ctxt->instate;
3273 int count = 0;
3274
3275 SHRINK;
3276 if (RAW == '"') {
3277 NEXT;
3278 stop = '"';
3279 } else if (RAW == '\'') {
3280 NEXT;
3281 stop = '\'';
3282 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003283 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003284 return(NULL);
3285 }
3286
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003287 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003288 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003289 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003290 return(NULL);
3291 }
3292 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3293 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003294 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003295 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003296 xmlChar *tmp;
3297
Owen Taylor3473f882001-02-23 17:55:21 +00003298 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003299 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3300 if (tmp == NULL) {
3301 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003302 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003303 ctxt->instate = (xmlParserInputState) state;
3304 return(NULL);
3305 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003306 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003307 }
3308 count++;
3309 if (count > 50) {
3310 GROW;
3311 count = 0;
3312 }
3313 COPY_BUF(l,buf,len,cur);
3314 NEXTL(l);
3315 cur = CUR_CHAR(l);
3316 if (cur == 0) {
3317 GROW;
3318 SHRINK;
3319 cur = CUR_CHAR(l);
3320 }
3321 }
3322 buf[len] = 0;
3323 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003324 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003325 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003326 } else {
3327 NEXT;
3328 }
3329 return(buf);
3330}
3331
3332/**
3333 * xmlParsePubidLiteral:
3334 * @ctxt: an XML parser context
3335 *
3336 * parse an XML public literal
3337 *
3338 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3339 *
3340 * Returns the PubidLiteral parsed or NULL.
3341 */
3342
3343xmlChar *
3344xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3345 xmlChar *buf = NULL;
3346 int len = 0;
3347 int size = XML_PARSER_BUFFER_SIZE;
3348 xmlChar cur;
3349 xmlChar stop;
3350 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003351 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003352
3353 SHRINK;
3354 if (RAW == '"') {
3355 NEXT;
3356 stop = '"';
3357 } else if (RAW == '\'') {
3358 NEXT;
3359 stop = '\'';
3360 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003361 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003362 return(NULL);
3363 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003364 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003365 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003366 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003367 return(NULL);
3368 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003369 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003370 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003371 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003372 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003373 xmlChar *tmp;
3374
Owen Taylor3473f882001-02-23 17:55:21 +00003375 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003376 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3377 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003378 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003379 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003380 return(NULL);
3381 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003383 }
3384 buf[len++] = cur;
3385 count++;
3386 if (count > 50) {
3387 GROW;
3388 count = 0;
3389 }
3390 NEXT;
3391 cur = CUR;
3392 if (cur == 0) {
3393 GROW;
3394 SHRINK;
3395 cur = CUR;
3396 }
3397 }
3398 buf[len] = 0;
3399 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003400 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003401 } else {
3402 NEXT;
3403 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003404 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003405 return(buf);
3406}
3407
Daniel Veillard48b2f892001-02-25 16:11:03 +00003408void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003409
3410/*
3411 * used for the test in the inner loop of the char data testing
3412 */
3413static const unsigned char test_char_data[256] = {
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3419 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3420 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3421 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3422 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3423 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3424 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3425 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3426 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3427 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3428 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3429 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3430 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3431 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3432 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3433 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3434 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3435 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3436 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3437 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3438 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3446};
3447
Owen Taylor3473f882001-02-23 17:55:21 +00003448/**
3449 * xmlParseCharData:
3450 * @ctxt: an XML parser context
3451 * @cdata: int indicating whether we are within a CDATA section
3452 *
3453 * parse a CharData section.
3454 * if we are within a CDATA section ']]>' marks an end of section.
3455 *
3456 * The right angle bracket (>) may be represented using the string "&gt;",
3457 * and must, for compatibility, be escaped using "&gt;" or a character
3458 * reference when it appears in the string "]]>" in content, when that
3459 * string is not marking the end of a CDATA section.
3460 *
3461 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3462 */
3463
3464void
3465xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003466 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003467 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003468 int line = ctxt->input->line;
3469 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003470 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003471
3472 SHRINK;
3473 GROW;
3474 /*
3475 * Accelerated common case where input don't need to be
3476 * modified before passing it to the handler.
3477 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003478 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003479 in = ctxt->input->cur;
3480 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003481get_more_space:
3482 while (*in == 0x20) in++;
3483 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003484 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003485 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003486 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003487 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003488 goto get_more_space;
3489 }
3490 if (*in == '<') {
3491 nbchar = in - ctxt->input->cur;
3492 if (nbchar > 0) {
3493 const xmlChar *tmp = ctxt->input->cur;
3494 ctxt->input->cur = in;
3495
Daniel Veillard34099b42004-11-04 17:34:35 +00003496 if ((ctxt->sax != NULL) &&
3497 (ctxt->sax->ignorableWhitespace !=
3498 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003499 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003500 if (ctxt->sax->ignorableWhitespace != NULL)
3501 ctxt->sax->ignorableWhitespace(ctxt->userData,
3502 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003503 } else if (ctxt->sax->characters != NULL)
3504 ctxt->sax->characters(ctxt->userData,
3505 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003506 } else if ((ctxt->sax != NULL) &&
3507 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 ctxt->sax->characters(ctxt->userData,
3509 tmp, nbchar);
3510 }
3511 }
3512 return;
3513 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003514
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003515get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003516 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003517 while (test_char_data[*in]) {
3518 in++;
3519 ccol++;
3520 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003521 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003522 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003523 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003524 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003525 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003526 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003527 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003528 }
3529 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003530 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003531 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003532 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003533 return;
3534 }
3535 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003536 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003537 goto get_more;
3538 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003539 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003540 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003541 if ((ctxt->sax != NULL) &&
3542 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003543 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003544 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003545 const xmlChar *tmp = ctxt->input->cur;
3546 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003547
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003548 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003549 if (ctxt->sax->ignorableWhitespace != NULL)
3550 ctxt->sax->ignorableWhitespace(ctxt->userData,
3551 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003552 } else if (ctxt->sax->characters != NULL)
3553 ctxt->sax->characters(ctxt->userData,
3554 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003555 line = ctxt->input->line;
3556 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003557 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003558 if (ctxt->sax->characters != NULL)
3559 ctxt->sax->characters(ctxt->userData,
3560 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003561 line = ctxt->input->line;
3562 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003563 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003564 }
3565 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003566 if (*in == 0xD) {
3567 in++;
3568 if (*in == 0xA) {
3569 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003570 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003571 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003572 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003573 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003574 in--;
3575 }
3576 if (*in == '<') {
3577 return;
3578 }
3579 if (*in == '&') {
3580 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 SHRINK;
3583 GROW;
3584 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003585 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003586 nbchar = 0;
3587 }
Daniel Veillard50582112001-03-26 22:52:16 +00003588 ctxt->input->line = line;
3589 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 xmlParseCharDataComplex(ctxt, cdata);
3591}
3592
Daniel Veillard01c13b52002-12-10 15:19:08 +00003593/**
3594 * xmlParseCharDataComplex:
3595 * @ctxt: an XML parser context
3596 * @cdata: int indicating whether we are within a CDATA section
3597 *
3598 * parse a CharData section.this is the fallback function
3599 * of xmlParseCharData() when the parsing requires handling
3600 * of non-ASCII characters.
3601 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003602void
3603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3605 int nbchar = 0;
3606 int cur, l;
3607 int count = 0;
3608
3609 SHRINK;
3610 GROW;
3611 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003612 while ((cur != '<') && /* checked */
3613 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003615 if ((cur == ']') && (NXT(1) == ']') &&
3616 (NXT(2) == '>')) {
3617 if (cdata) break;
3618 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003619 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003620 }
3621 }
3622 COPY_BUF(l,buf,nbchar,cur);
3623 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003624 buf[nbchar] = 0;
3625
Owen Taylor3473f882001-02-23 17:55:21 +00003626 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003627 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003628 */
3629 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003630 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003631 if (ctxt->sax->ignorableWhitespace != NULL)
3632 ctxt->sax->ignorableWhitespace(ctxt->userData,
3633 buf, nbchar);
3634 } else {
3635 if (ctxt->sax->characters != NULL)
3636 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3637 }
3638 }
3639 nbchar = 0;
3640 }
3641 count++;
3642 if (count > 50) {
3643 GROW;
3644 count = 0;
3645 }
3646 NEXTL(l);
3647 cur = CUR_CHAR(l);
3648 }
3649 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003650 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003651 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003652 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003653 */
3654 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003655 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003656 if (ctxt->sax->ignorableWhitespace != NULL)
3657 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3658 } else {
3659 if (ctxt->sax->characters != NULL)
3660 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3661 }
3662 }
3663 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003664 if ((cur != 0) && (!IS_CHAR(cur))) {
3665 /* Generate the error and skip the offending character */
3666 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3667 "PCDATA invalid Char value %d\n",
3668 cur);
3669 NEXTL(l);
3670 }
Owen Taylor3473f882001-02-23 17:55:21 +00003671}
3672
3673/**
3674 * xmlParseExternalID:
3675 * @ctxt: an XML parser context
3676 * @publicID: a xmlChar** receiving PubidLiteral
3677 * @strict: indicate whether we should restrict parsing to only
3678 * production [75], see NOTE below
3679 *
3680 * Parse an External ID or a Public ID
3681 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003682 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003683 * 'PUBLIC' S PubidLiteral S SystemLiteral
3684 *
3685 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3686 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3687 *
3688 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3689 *
3690 * Returns the function returns SystemLiteral and in the second
3691 * case publicID receives PubidLiteral, is strict is off
3692 * it is possible to return NULL and have publicID set.
3693 */
3694
3695xmlChar *
3696xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3697 xmlChar *URI = NULL;
3698
3699 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003700
3701 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003702 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003703 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003704 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3706 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 SKIP_BLANKS;
3709 URI = xmlParseSystemLiteral(ctxt);
3710 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003711 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003712 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003713 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003714 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003715 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003717 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003718 }
3719 SKIP_BLANKS;
3720 *publicID = xmlParsePubidLiteral(ctxt);
3721 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003722 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003723 }
3724 if (strict) {
3725 /*
3726 * We don't handle [83] so "S SystemLiteral" is required.
3727 */
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003730 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 } else {
3733 /*
3734 * We handle [83] so we return immediately, if
3735 * "S SystemLiteral" is not detected. From a purely parsing
3736 * point of view that's a nice mess.
3737 */
3738 const xmlChar *ptr;
3739 GROW;
3740
3741 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003742 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003743
William M. Brack76e95df2003-10-18 16:20:14 +00003744 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3746 }
3747 SKIP_BLANKS;
3748 URI = xmlParseSystemLiteral(ctxt);
3749 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003750 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003751 }
3752 }
3753 return(URI);
3754}
3755
3756/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003757 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003758 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003759 * @buf: the already parsed part of the buffer
3760 * @len: number of bytes filles in the buffer
3761 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003762 *
3763 * Skip an XML (SGML) comment <!-- .... -->
3764 * The spec says that "For compatibility, the string "--" (double-hyphen)
3765 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003766 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003767 *
3768 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3769 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003770static void
3771xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003772 int q, ql;
3773 int r, rl;
3774 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 xmlParserInputPtr input = ctxt->input;
3776 int count = 0;
3777
Owen Taylor3473f882001-02-23 17:55:21 +00003778 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003779 len = 0;
3780 size = XML_PARSER_BUFFER_SIZE;
3781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3782 if (buf == NULL) {
3783 xmlErrMemory(ctxt, NULL);
3784 return;
3785 }
Owen Taylor3473f882001-02-23 17:55:21 +00003786 }
3787 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003788 if (q == 0)
3789 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003790 NEXTL(ql);
3791 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003792 if (r == 0)
3793 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003794 NEXTL(rl);
3795 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003796 if (cur == 0)
3797 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003798 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003799 ((cur != '>') ||
3800 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003801 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003802 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 }
3804 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003805 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003806 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003807 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3808 if (new_buf == NULL) {
3809 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003811 return;
3812 }
William M. Bracka3215c72004-07-31 16:24:01 +00003813 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 COPY_BUF(ql,buf,len,q);
3816 q = r;
3817 ql = rl;
3818 r = cur;
3819 rl = l;
3820
3821 count++;
3822 if (count > 50) {
3823 GROW;
3824 count = 0;
3825 }
3826 NEXTL(l);
3827 cur = CUR_CHAR(l);
3828 if (cur == 0) {
3829 SHRINK;
3830 GROW;
3831 cur = CUR_CHAR(l);
3832 }
3833 }
3834 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003835 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003836 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003837 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 xmlFree(buf);
3839 } else {
3840 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003841 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3842 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003843 }
3844 NEXT;
3845 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3846 (!ctxt->disableSAX))
3847 ctxt->sax->comment(ctxt->userData, buf);
3848 xmlFree(buf);
3849 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003850 return;
3851not_terminated:
3852 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3853 "Comment not terminated\n", NULL);
3854 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003855}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003856/**
3857 * xmlParseComment:
3858 * @ctxt: an XML parser context
3859 *
3860 * Skip an XML (SGML) comment <!-- .... -->
3861 * The spec says that "For compatibility, the string "--" (double-hyphen)
3862 * must not occur within comments. "
3863 *
3864 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3865 */
3866void
3867xmlParseComment(xmlParserCtxtPtr ctxt) {
3868 xmlChar *buf = NULL;
3869 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003870 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003871 xmlParserInputState state;
3872 const xmlChar *in;
3873 int nbchar = 0, ccol;
3874
3875 /*
3876 * Check that there is a comment right here.
3877 */
3878 if ((RAW != '<') || (NXT(1) != '!') ||
3879 (NXT(2) != '-') || (NXT(3) != '-')) return;
3880
3881 state = ctxt->instate;
3882 ctxt->instate = XML_PARSER_COMMENT;
3883 SKIP(4);
3884 SHRINK;
3885 GROW;
3886
3887 /*
3888 * Accelerated common case where input don't need to be
3889 * modified before passing it to the handler.
3890 */
3891 in = ctxt->input->cur;
3892 do {
3893 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003894 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003895 ctxt->input->line++; ctxt->input->col = 1;
3896 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003897 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003898 }
3899get_more:
3900 ccol = ctxt->input->col;
3901 while (((*in > '-') && (*in <= 0x7F)) ||
3902 ((*in >= 0x20) && (*in < '-')) ||
3903 (*in == 0x09)) {
3904 in++;
3905 ccol++;
3906 }
3907 ctxt->input->col = ccol;
3908 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003909 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003910 ctxt->input->line++; ctxt->input->col = 1;
3911 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003912 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003913 goto get_more;
3914 }
3915 nbchar = in - ctxt->input->cur;
3916 /*
3917 * save current set of data
3918 */
3919 if (nbchar > 0) {
3920 if ((ctxt->sax != NULL) &&
3921 (ctxt->sax->comment != NULL)) {
3922 if (buf == NULL) {
3923 if ((*in == '-') && (in[1] == '-'))
3924 size = nbchar + 1;
3925 else
3926 size = XML_PARSER_BUFFER_SIZE + nbchar;
3927 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3928 if (buf == NULL) {
3929 xmlErrMemory(ctxt, NULL);
3930 ctxt->instate = state;
3931 return;
3932 }
3933 len = 0;
3934 } else if (len + nbchar + 1 >= size) {
3935 xmlChar *new_buf;
3936 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3937 new_buf = (xmlChar *) xmlRealloc(buf,
3938 size * sizeof(xmlChar));
3939 if (new_buf == NULL) {
3940 xmlFree (buf);
3941 xmlErrMemory(ctxt, NULL);
3942 ctxt->instate = state;
3943 return;
3944 }
3945 buf = new_buf;
3946 }
3947 memcpy(&buf[len], ctxt->input->cur, nbchar);
3948 len += nbchar;
3949 buf[len] = 0;
3950 }
3951 }
3952 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003953 if (*in == 0xA) {
3954 in++;
3955 ctxt->input->line++; ctxt->input->col = 1;
3956 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003957 if (*in == 0xD) {
3958 in++;
3959 if (*in == 0xA) {
3960 ctxt->input->cur = in;
3961 in++;
3962 ctxt->input->line++; ctxt->input->col = 1;
3963 continue; /* while */
3964 }
3965 in--;
3966 }
3967 SHRINK;
3968 GROW;
3969 in = ctxt->input->cur;
3970 if (*in == '-') {
3971 if (in[1] == '-') {
3972 if (in[2] == '>') {
3973 SKIP(3);
3974 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3975 (!ctxt->disableSAX)) {
3976 if (buf != NULL)
3977 ctxt->sax->comment(ctxt->userData, buf);
3978 else
3979 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3980 }
3981 if (buf != NULL)
3982 xmlFree(buf);
3983 ctxt->instate = state;
3984 return;
3985 }
3986 if (buf != NULL)
3987 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3988 "Comment not terminated \n<!--%.50s\n",
3989 buf);
3990 else
3991 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3992 "Comment not terminated \n", NULL);
3993 in++;
3994 ctxt->input->col++;
3995 }
3996 in++;
3997 ctxt->input->col++;
3998 goto get_more;
3999 }
4000 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4001 xmlParseCommentComplex(ctxt, buf, len, size);
4002 ctxt->instate = state;
4003 return;
4004}
4005
Owen Taylor3473f882001-02-23 17:55:21 +00004006
4007/**
4008 * xmlParsePITarget:
4009 * @ctxt: an XML parser context
4010 *
4011 * parse the name of a PI
4012 *
4013 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4014 *
4015 * Returns the PITarget name or NULL
4016 */
4017
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004018const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004019xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004020 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004021
4022 name = xmlParseName(ctxt);
4023 if ((name != NULL) &&
4024 ((name[0] == 'x') || (name[0] == 'X')) &&
4025 ((name[1] == 'm') || (name[1] == 'M')) &&
4026 ((name[2] == 'l') || (name[2] == 'L'))) {
4027 int i;
4028 if ((name[0] == 'x') && (name[1] == 'm') &&
4029 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004030 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004031 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004032 return(name);
4033 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004034 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004035 return(name);
4036 }
4037 for (i = 0;;i++) {
4038 if (xmlW3CPIs[i] == NULL) break;
4039 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4040 return(name);
4041 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004042 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4043 "xmlParsePITarget: invalid name prefix 'xml'\n",
4044 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004045 }
4046 return(name);
4047}
4048
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004049#ifdef LIBXML_CATALOG_ENABLED
4050/**
4051 * xmlParseCatalogPI:
4052 * @ctxt: an XML parser context
4053 * @catalog: the PI value string
4054 *
4055 * parse an XML Catalog Processing Instruction.
4056 *
4057 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4058 *
4059 * Occurs only if allowed by the user and if happening in the Misc
4060 * part of the document before any doctype informations
4061 * This will add the given catalog to the parsing context in order
4062 * to be used if there is a resolution need further down in the document
4063 */
4064
4065static void
4066xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4067 xmlChar *URL = NULL;
4068 const xmlChar *tmp, *base;
4069 xmlChar marker;
4070
4071 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004072 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004073 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4074 goto error;
4075 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004076 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004077 if (*tmp != '=') {
4078 return;
4079 }
4080 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004081 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004082 marker = *tmp;
4083 if ((marker != '\'') && (marker != '"'))
4084 goto error;
4085 tmp++;
4086 base = tmp;
4087 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4088 if (*tmp == 0)
4089 goto error;
4090 URL = xmlStrndup(base, tmp - base);
4091 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004092 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004093 if (*tmp != 0)
4094 goto error;
4095
4096 if (URL != NULL) {
4097 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4098 xmlFree(URL);
4099 }
4100 return;
4101
4102error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004103 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4104 "Catalog PI syntax error: %s\n",
4105 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004106 if (URL != NULL)
4107 xmlFree(URL);
4108}
4109#endif
4110
Owen Taylor3473f882001-02-23 17:55:21 +00004111/**
4112 * xmlParsePI:
4113 * @ctxt: an XML parser context
4114 *
4115 * parse an XML Processing Instruction.
4116 *
4117 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4118 *
4119 * The processing is transfered to SAX once parsed.
4120 */
4121
4122void
4123xmlParsePI(xmlParserCtxtPtr ctxt) {
4124 xmlChar *buf = NULL;
4125 int len = 0;
4126 int size = XML_PARSER_BUFFER_SIZE;
4127 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004128 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004129 xmlParserInputState state;
4130 int count = 0;
4131
4132 if ((RAW == '<') && (NXT(1) == '?')) {
4133 xmlParserInputPtr input = ctxt->input;
4134 state = ctxt->instate;
4135 ctxt->instate = XML_PARSER_PI;
4136 /*
4137 * this is a Processing Instruction.
4138 */
4139 SKIP(2);
4140 SHRINK;
4141
4142 /*
4143 * Parse the target name and check for special support like
4144 * namespace.
4145 */
4146 target = xmlParsePITarget(ctxt);
4147 if (target != NULL) {
4148 if ((RAW == '?') && (NXT(1) == '>')) {
4149 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004150 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4151 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004152 }
4153 SKIP(2);
4154
4155 /*
4156 * SAX: PI detected.
4157 */
4158 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4159 (ctxt->sax->processingInstruction != NULL))
4160 ctxt->sax->processingInstruction(ctxt->userData,
4161 target, NULL);
4162 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004163 return;
4164 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004165 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004166 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004167 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004168 ctxt->instate = state;
4169 return;
4170 }
4171 cur = CUR;
4172 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004173 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4174 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004175 }
4176 SKIP_BLANKS;
4177 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004178 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004179 ((cur != '?') || (NXT(1) != '>'))) {
4180 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004181 xmlChar *tmp;
4182
Owen Taylor3473f882001-02-23 17:55:21 +00004183 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004184 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4185 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004187 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004188 ctxt->instate = state;
4189 return;
4190 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004191 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 count++;
4194 if (count > 50) {
4195 GROW;
4196 count = 0;
4197 }
4198 COPY_BUF(l,buf,len,cur);
4199 NEXTL(l);
4200 cur = CUR_CHAR(l);
4201 if (cur == 0) {
4202 SHRINK;
4203 GROW;
4204 cur = CUR_CHAR(l);
4205 }
4206 }
4207 buf[len] = 0;
4208 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004209 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4210 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004211 } else {
4212 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004213 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4214 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004215 }
4216 SKIP(2);
4217
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004218#ifdef LIBXML_CATALOG_ENABLED
4219 if (((state == XML_PARSER_MISC) ||
4220 (state == XML_PARSER_START)) &&
4221 (xmlStrEqual(target, XML_CATALOG_PI))) {
4222 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4223 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4224 (allow == XML_CATA_ALLOW_ALL))
4225 xmlParseCatalogPI(ctxt, buf);
4226 }
4227#endif
4228
4229
Owen Taylor3473f882001-02-23 17:55:21 +00004230 /*
4231 * SAX: PI detected.
4232 */
4233 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4234 (ctxt->sax->processingInstruction != NULL))
4235 ctxt->sax->processingInstruction(ctxt->userData,
4236 target, buf);
4237 }
4238 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004239 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 }
4242 ctxt->instate = state;
4243 }
4244}
4245
4246/**
4247 * xmlParseNotationDecl:
4248 * @ctxt: an XML parser context
4249 *
4250 * parse a notation declaration
4251 *
4252 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4253 *
4254 * Hence there is actually 3 choices:
4255 * 'PUBLIC' S PubidLiteral
4256 * 'PUBLIC' S PubidLiteral S SystemLiteral
4257 * and 'SYSTEM' S SystemLiteral
4258 *
4259 * See the NOTE on xmlParseExternalID().
4260 */
4261
4262void
4263xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004264 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004265 xmlChar *Pubid;
4266 xmlChar *Systemid;
4267
Daniel Veillarda07050d2003-10-19 14:46:32 +00004268 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004269 xmlParserInputPtr input = ctxt->input;
4270 SHRINK;
4271 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004272 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004273 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4274 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004275 return;
4276 }
4277 SKIP_BLANKS;
4278
Daniel Veillard76d66f42001-05-16 21:05:17 +00004279 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004280 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004281 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004282 return;
4283 }
William M. Brack76e95df2003-10-18 16:20:14 +00004284 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004286 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004287 return;
4288 }
4289 SKIP_BLANKS;
4290
4291 /*
4292 * Parse the IDs.
4293 */
4294 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4295 SKIP_BLANKS;
4296
4297 if (RAW == '>') {
4298 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4300 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004301 }
4302 NEXT;
4303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4304 (ctxt->sax->notationDecl != NULL))
4305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004308 }
Owen Taylor3473f882001-02-23 17:55:21 +00004309 if (Systemid != NULL) xmlFree(Systemid);
4310 if (Pubid != NULL) xmlFree(Pubid);
4311 }
4312}
4313
4314/**
4315 * xmlParseEntityDecl:
4316 * @ctxt: an XML parser context
4317 *
4318 * parse <!ENTITY declarations
4319 *
4320 * [70] EntityDecl ::= GEDecl | PEDecl
4321 *
4322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4323 *
4324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4325 *
4326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4327 *
4328 * [74] PEDef ::= EntityValue | ExternalID
4329 *
4330 * [76] NDataDecl ::= S 'NDATA' S Name
4331 *
4332 * [ VC: Notation Declared ]
4333 * The Name must match the declared name of a notation.
4334 */
4335
4336void
4337xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004338 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004339 xmlChar *value = NULL;
4340 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004341 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004342 int isParameter = 0;
4343 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004344 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004345
Daniel Veillard4c778d82005-01-23 17:37:44 +00004346 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004347 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004348 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 SHRINK;
4350 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004351 skipped = SKIP_BLANKS;
4352 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4354 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004355 }
Owen Taylor3473f882001-02-23 17:55:21 +00004356
4357 if (RAW == '%') {
4358 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004359 skipped = SKIP_BLANKS;
4360 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004361 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4362 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004363 }
Owen Taylor3473f882001-02-23 17:55:21 +00004364 isParameter = 1;
4365 }
4366
Daniel Veillard76d66f42001-05-16 21:05:17 +00004367 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004368 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004369 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4370 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004371 return;
4372 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004373 skipped = SKIP_BLANKS;
4374 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4376 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
Owen Taylor3473f882001-02-23 17:55:21 +00004378
Daniel Veillardf5582f12002-06-11 10:08:16 +00004379 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004380 /*
4381 * handle the various case of definitions...
4382 */
4383 if (isParameter) {
4384 if ((RAW == '"') || (RAW == '\'')) {
4385 value = xmlParseEntityValue(ctxt, &orig);
4386 if (value) {
4387 if ((ctxt->sax != NULL) &&
4388 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4389 ctxt->sax->entityDecl(ctxt->userData, name,
4390 XML_INTERNAL_PARAMETER_ENTITY,
4391 NULL, NULL, value);
4392 }
4393 } else {
4394 URI = xmlParseExternalID(ctxt, &literal, 1);
4395 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004396 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004397 }
4398 if (URI) {
4399 xmlURIPtr uri;
4400
4401 uri = xmlParseURI((const char *) URI);
4402 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004403 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4404 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004405 /*
4406 * This really ought to be a well formedness error
4407 * but the XML Core WG decided otherwise c.f. issue
4408 * E26 of the XML erratas.
4409 */
Owen Taylor3473f882001-02-23 17:55:21 +00004410 } else {
4411 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004412 /*
4413 * Okay this is foolish to block those but not
4414 * invalid URIs.
4415 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004416 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004417 } else {
4418 if ((ctxt->sax != NULL) &&
4419 (!ctxt->disableSAX) &&
4420 (ctxt->sax->entityDecl != NULL))
4421 ctxt->sax->entityDecl(ctxt->userData, name,
4422 XML_EXTERNAL_PARAMETER_ENTITY,
4423 literal, URI, NULL);
4424 }
4425 xmlFreeURI(uri);
4426 }
4427 }
4428 }
4429 } else {
4430 if ((RAW == '"') || (RAW == '\'')) {
4431 value = xmlParseEntityValue(ctxt, &orig);
4432 if ((ctxt->sax != NULL) &&
4433 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4434 ctxt->sax->entityDecl(ctxt->userData, name,
4435 XML_INTERNAL_GENERAL_ENTITY,
4436 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004437 /*
4438 * For expat compatibility in SAX mode.
4439 */
4440 if ((ctxt->myDoc == NULL) ||
4441 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4442 if (ctxt->myDoc == NULL) {
4443 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4444 }
4445 if (ctxt->myDoc->intSubset == NULL)
4446 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4447 BAD_CAST "fake", NULL, NULL);
4448
Daniel Veillard1af9a412003-08-20 22:54:39 +00004449 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4450 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 } else {
4453 URI = xmlParseExternalID(ctxt, &literal, 1);
4454 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004455 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004456 }
4457 if (URI) {
4458 xmlURIPtr uri;
4459
4460 uri = xmlParseURI((const char *)URI);
4461 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004462 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4463 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004464 /*
4465 * This really ought to be a well formedness error
4466 * but the XML Core WG decided otherwise c.f. issue
4467 * E26 of the XML erratas.
4468 */
Owen Taylor3473f882001-02-23 17:55:21 +00004469 } else {
4470 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004471 /*
4472 * Okay this is foolish to block those but not
4473 * invalid URIs.
4474 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004475 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004476 }
4477 xmlFreeURI(uri);
4478 }
4479 }
William M. Brack76e95df2003-10-18 16:20:14 +00004480 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4482 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004483 }
4484 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004492 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004493 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4494 (ctxt->sax->unparsedEntityDecl != NULL))
4495 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4496 literal, URI, ndata);
4497 } else {
4498 if ((ctxt->sax != NULL) &&
4499 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4500 ctxt->sax->entityDecl(ctxt->userData, name,
4501 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4502 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004503 /*
4504 * For expat compatibility in SAX mode.
4505 * assuming the entity repalcement was asked for
4506 */
4507 if ((ctxt->replaceEntities != 0) &&
4508 ((ctxt->myDoc == NULL) ||
4509 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4510 if (ctxt->myDoc == NULL) {
4511 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4512 }
4513
4514 if (ctxt->myDoc->intSubset == NULL)
4515 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4516 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004517 xmlSAX2EntityDecl(ctxt, name,
4518 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4519 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004520 }
Owen Taylor3473f882001-02-23 17:55:21 +00004521 }
4522 }
4523 }
4524 SKIP_BLANKS;
4525 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004526 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004527 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004528 } else {
4529 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004530 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4531 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004532 }
4533 NEXT;
4534 }
4535 if (orig != NULL) {
4536 /*
4537 * Ugly mechanism to save the raw entity value.
4538 */
4539 xmlEntityPtr cur = NULL;
4540
4541 if (isParameter) {
4542 if ((ctxt->sax != NULL) &&
4543 (ctxt->sax->getParameterEntity != NULL))
4544 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4545 } else {
4546 if ((ctxt->sax != NULL) &&
4547 (ctxt->sax->getEntity != NULL))
4548 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004549 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004550 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004551 }
Owen Taylor3473f882001-02-23 17:55:21 +00004552 }
4553 if (cur != NULL) {
4554 if (cur->orig != NULL)
4555 xmlFree(orig);
4556 else
4557 cur->orig = orig;
4558 } else
4559 xmlFree(orig);
4560 }
Owen Taylor3473f882001-02-23 17:55:21 +00004561 if (value != NULL) xmlFree(value);
4562 if (URI != NULL) xmlFree(URI);
4563 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004564 }
4565}
4566
4567/**
4568 * xmlParseDefaultDecl:
4569 * @ctxt: an XML parser context
4570 * @value: Receive a possible fixed default value for the attribute
4571 *
4572 * Parse an attribute default declaration
4573 *
4574 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4575 *
4576 * [ VC: Required Attribute ]
4577 * if the default declaration is the keyword #REQUIRED, then the
4578 * attribute must be specified for all elements of the type in the
4579 * attribute-list declaration.
4580 *
4581 * [ VC: Attribute Default Legal ]
4582 * The declared default value must meet the lexical constraints of
4583 * the declared attribute type c.f. xmlValidateAttributeDecl()
4584 *
4585 * [ VC: Fixed Attribute Default ]
4586 * if an attribute has a default value declared with the #FIXED
4587 * keyword, instances of that attribute must match the default value.
4588 *
4589 * [ WFC: No < in Attribute Values ]
4590 * handled in xmlParseAttValue()
4591 *
4592 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4593 * or XML_ATTRIBUTE_FIXED.
4594 */
4595
4596int
4597xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4598 int val;
4599 xmlChar *ret;
4600
4601 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004602 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004603 SKIP(9);
4604 return(XML_ATTRIBUTE_REQUIRED);
4605 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004606 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004607 SKIP(8);
4608 return(XML_ATTRIBUTE_IMPLIED);
4609 }
4610 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004611 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004612 SKIP(6);
4613 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004614 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004615 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4616 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004617 }
4618 SKIP_BLANKS;
4619 }
4620 ret = xmlParseAttValue(ctxt);
4621 ctxt->instate = XML_PARSER_DTD;
4622 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004623 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004624 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004625 } else
4626 *value = ret;
4627 return(val);
4628}
4629
4630/**
4631 * xmlParseNotationType:
4632 * @ctxt: an XML parser context
4633 *
4634 * parse an Notation attribute type.
4635 *
4636 * Note: the leading 'NOTATION' S part has already being parsed...
4637 *
4638 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4639 *
4640 * [ VC: Notation Attributes ]
4641 * Values of this type must match one of the notation names included
4642 * in the declaration; all notation names in the declaration must be declared.
4643 *
4644 * Returns: the notation attribute tree built while parsing
4645 */
4646
4647xmlEnumerationPtr
4648xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004649 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004650 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4651
4652 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004653 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004654 return(NULL);
4655 }
4656 SHRINK;
4657 do {
4658 NEXT;
4659 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004660 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004661 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004662 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4663 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004664 return(ret);
4665 }
4666 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004667 if (cur == NULL) return(ret);
4668 if (last == NULL) ret = last = cur;
4669 else {
4670 last->next = cur;
4671 last = cur;
4672 }
4673 SKIP_BLANKS;
4674 } while (RAW == '|');
4675 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004676 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004677 if ((last != NULL) && (last != ret))
4678 xmlFreeEnumeration(last);
4679 return(ret);
4680 }
4681 NEXT;
4682 return(ret);
4683}
4684
4685/**
4686 * xmlParseEnumerationType:
4687 * @ctxt: an XML parser context
4688 *
4689 * parse an Enumeration attribute type.
4690 *
4691 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4692 *
4693 * [ VC: Enumeration ]
4694 * Values of this type must match one of the Nmtoken tokens in
4695 * the declaration
4696 *
4697 * Returns: the enumeration attribute tree built while parsing
4698 */
4699
4700xmlEnumerationPtr
4701xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4702 xmlChar *name;
4703 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4704
4705 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004706 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 return(NULL);
4708 }
4709 SHRINK;
4710 do {
4711 NEXT;
4712 SKIP_BLANKS;
4713 name = xmlParseNmtoken(ctxt);
4714 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004715 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004716 return(ret);
4717 }
4718 cur = xmlCreateEnumeration(name);
4719 xmlFree(name);
4720 if (cur == NULL) return(ret);
4721 if (last == NULL) ret = last = cur;
4722 else {
4723 last->next = cur;
4724 last = cur;
4725 }
4726 SKIP_BLANKS;
4727 } while (RAW == '|');
4728 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004729 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004730 return(ret);
4731 }
4732 NEXT;
4733 return(ret);
4734}
4735
4736/**
4737 * xmlParseEnumeratedType:
4738 * @ctxt: an XML parser context
4739 * @tree: the enumeration tree built while parsing
4740 *
4741 * parse an Enumerated attribute type.
4742 *
4743 * [57] EnumeratedType ::= NotationType | Enumeration
4744 *
4745 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4746 *
4747 *
4748 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4749 */
4750
4751int
4752xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004753 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004754 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004755 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4757 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004758 return(0);
4759 }
4760 SKIP_BLANKS;
4761 *tree = xmlParseNotationType(ctxt);
4762 if (*tree == NULL) return(0);
4763 return(XML_ATTRIBUTE_NOTATION);
4764 }
4765 *tree = xmlParseEnumerationType(ctxt);
4766 if (*tree == NULL) return(0);
4767 return(XML_ATTRIBUTE_ENUMERATION);
4768}
4769
4770/**
4771 * xmlParseAttributeType:
4772 * @ctxt: an XML parser context
4773 * @tree: the enumeration tree built while parsing
4774 *
4775 * parse the Attribute list def for an element
4776 *
4777 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4778 *
4779 * [55] StringType ::= 'CDATA'
4780 *
4781 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4782 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4783 *
4784 * Validity constraints for attribute values syntax are checked in
4785 * xmlValidateAttributeValue()
4786 *
4787 * [ VC: ID ]
4788 * Values of type ID must match the Name production. A name must not
4789 * appear more than once in an XML document as a value of this type;
4790 * i.e., ID values must uniquely identify the elements which bear them.
4791 *
4792 * [ VC: One ID per Element Type ]
4793 * No element type may have more than one ID attribute specified.
4794 *
4795 * [ VC: ID Attribute Default ]
4796 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4797 *
4798 * [ VC: IDREF ]
4799 * Values of type IDREF must match the Name production, and values
4800 * of type IDREFS must match Names; each IDREF Name must match the value
4801 * of an ID attribute on some element in the XML document; i.e. IDREF
4802 * values must match the value of some ID attribute.
4803 *
4804 * [ VC: Entity Name ]
4805 * Values of type ENTITY must match the Name production, values
4806 * of type ENTITIES must match Names; each Entity Name must match the
4807 * name of an unparsed entity declared in the DTD.
4808 *
4809 * [ VC: Name Token ]
4810 * Values of type NMTOKEN must match the Nmtoken production; values
4811 * of type NMTOKENS must match Nmtokens.
4812 *
4813 * Returns the attribute type
4814 */
4815int
4816xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4817 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004818 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004819 SKIP(5);
4820 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004821 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004822 SKIP(6);
4823 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004824 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004825 SKIP(5);
4826 return(XML_ATTRIBUTE_IDREF);
4827 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4828 SKIP(2);
4829 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004830 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004831 SKIP(6);
4832 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004833 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004834 SKIP(8);
4835 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004836 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004837 SKIP(8);
4838 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004839 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004840 SKIP(7);
4841 return(XML_ATTRIBUTE_NMTOKEN);
4842 }
4843 return(xmlParseEnumeratedType(ctxt, tree));
4844}
4845
4846/**
4847 * xmlParseAttributeListDecl:
4848 * @ctxt: an XML parser context
4849 *
4850 * : parse the Attribute list def for an element
4851 *
4852 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4853 *
4854 * [53] AttDef ::= S Name S AttType S DefaultDecl
4855 *
4856 */
4857void
4858xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004859 const xmlChar *elemName;
4860 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004861 xmlEnumerationPtr tree;
4862
Daniel Veillarda07050d2003-10-19 14:46:32 +00004863 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004864 xmlParserInputPtr input = ctxt->input;
4865
4866 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004867 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004869 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004870 }
4871 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004872 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004873 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004874 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4875 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004876 return;
4877 }
4878 SKIP_BLANKS;
4879 GROW;
4880 while (RAW != '>') {
4881 const xmlChar *check = CUR_PTR;
4882 int type;
4883 int def;
4884 xmlChar *defaultValue = NULL;
4885
4886 GROW;
4887 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004888 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004889 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004890 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4891 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004892 break;
4893 }
4894 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004895 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004897 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004898 break;
4899 }
4900 SKIP_BLANKS;
4901
4902 type = xmlParseAttributeType(ctxt, &tree);
4903 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004904 break;
4905 }
4906
4907 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004908 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4910 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004911 if (tree != NULL)
4912 xmlFreeEnumeration(tree);
4913 break;
4914 }
4915 SKIP_BLANKS;
4916
4917 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4918 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004919 if (defaultValue != NULL)
4920 xmlFree(defaultValue);
4921 if (tree != NULL)
4922 xmlFreeEnumeration(tree);
4923 break;
4924 }
4925
4926 GROW;
4927 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004928 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004930 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004931 if (defaultValue != NULL)
4932 xmlFree(defaultValue);
4933 if (tree != NULL)
4934 xmlFreeEnumeration(tree);
4935 break;
4936 }
4937 SKIP_BLANKS;
4938 }
4939 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004940 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4941 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004942 if (defaultValue != NULL)
4943 xmlFree(defaultValue);
4944 if (tree != NULL)
4945 xmlFreeEnumeration(tree);
4946 break;
4947 }
4948 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4949 (ctxt->sax->attributeDecl != NULL))
4950 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4951 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004952 else if (tree != NULL)
4953 xmlFreeEnumeration(tree);
4954
4955 if ((ctxt->sax2) && (defaultValue != NULL) &&
4956 (def != XML_ATTRIBUTE_IMPLIED) &&
4957 (def != XML_ATTRIBUTE_REQUIRED)) {
4958 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4959 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004960 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4961 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4962 }
Owen Taylor3473f882001-02-23 17:55:21 +00004963 if (defaultValue != NULL)
4964 xmlFree(defaultValue);
4965 GROW;
4966 }
4967 if (RAW == '>') {
4968 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004969 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4970 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004971 }
4972 NEXT;
4973 }
Owen Taylor3473f882001-02-23 17:55:21 +00004974 }
4975}
4976
4977/**
4978 * xmlParseElementMixedContentDecl:
4979 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004980 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004981 *
4982 * parse the declaration for a Mixed Element content
4983 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4984 *
4985 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4986 * '(' S? '#PCDATA' S? ')'
4987 *
4988 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4989 *
4990 * [ VC: No Duplicate Types ]
4991 * The same name must not appear more than once in a single
4992 * mixed-content declaration.
4993 *
4994 * returns: the list of the xmlElementContentPtr describing the element choices
4995 */
4996xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004997xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004998 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004999 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005000
5001 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005002 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005003 SKIP(7);
5004 SKIP_BLANKS;
5005 SHRINK;
5006 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005007 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005008 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5009"Element content declaration doesn't start and stop in the same entity\n",
5010 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005011 }
Owen Taylor3473f882001-02-23 17:55:21 +00005012 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005013 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005014 if (RAW == '*') {
5015 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5016 NEXT;
5017 }
5018 return(ret);
5019 }
5020 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005021 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005022 if (ret == NULL) return(NULL);
5023 }
5024 while (RAW == '|') {
5025 NEXT;
5026 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005027 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005028 if (ret == NULL) return(NULL);
5029 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005030 if (cur != NULL)
5031 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 cur = ret;
5033 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005034 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005035 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005036 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005037 if (n->c1 != NULL)
5038 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005039 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005040 if (n != NULL)
5041 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005042 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005043 }
5044 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005045 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005046 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005047 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005048 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005049 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 return(NULL);
5051 }
5052 SKIP_BLANKS;
5053 GROW;
5054 }
5055 if ((RAW == ')') && (NXT(1) == '*')) {
5056 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005057 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005058 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005059 if (cur->c2 != NULL)
5060 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005061 }
5062 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005063 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005064 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5065"Element content declaration doesn't start and stop in the same entity\n",
5066 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005067 }
Owen Taylor3473f882001-02-23 17:55:21 +00005068 SKIP(2);
5069 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005070 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005071 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 return(NULL);
5073 }
5074
5075 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005076 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005077 }
5078 return(ret);
5079}
5080
5081/**
5082 * xmlParseElementChildrenContentDecl:
5083 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005084 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005085 *
5086 * parse the declaration for a Mixed Element content
5087 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5088 *
5089 *
5090 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5091 *
5092 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5093 *
5094 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5095 *
5096 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5097 *
5098 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5099 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005100 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005101 * opening or closing parentheses in a choice, seq, or Mixed
5102 * construct is contained in the replacement text for a parameter
5103 * entity, both must be contained in the same replacement text. For
5104 * interoperability, if a parameter-entity reference appears in a
5105 * choice, seq, or Mixed construct, its replacement text should not
5106 * be empty, and neither the first nor last non-blank character of
5107 * the replacement text should be a connector (| or ,).
5108 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005109 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005110 * hierarchy.
5111 */
5112xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005113xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005114 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005115 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 xmlChar type = 0;
5117
5118 SKIP_BLANKS;
5119 GROW;
5120 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005121 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005122
Owen Taylor3473f882001-02-23 17:55:21 +00005123 /* Recurse on first child */
5124 NEXT;
5125 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005126 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 SKIP_BLANKS;
5128 GROW;
5129 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005130 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005131 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 return(NULL);
5134 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005135 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005136 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005137 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005138 return(NULL);
5139 }
Owen Taylor3473f882001-02-23 17:55:21 +00005140 GROW;
5141 if (RAW == '?') {
5142 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5143 NEXT;
5144 } else if (RAW == '*') {
5145 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5146 NEXT;
5147 } else if (RAW == '+') {
5148 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5149 NEXT;
5150 } else {
5151 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5152 }
Owen Taylor3473f882001-02-23 17:55:21 +00005153 GROW;
5154 }
5155 SKIP_BLANKS;
5156 SHRINK;
5157 while (RAW != ')') {
5158 /*
5159 * Each loop we parse one separator and one element.
5160 */
5161 if (RAW == ',') {
5162 if (type == 0) type = CUR;
5163
5164 /*
5165 * Detect "Name | Name , Name" error
5166 */
5167 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005168 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005169 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005170 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005171 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005172 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005173 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005174 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005175 return(NULL);
5176 }
5177 NEXT;
5178
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005179 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005180 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005181 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005182 xmlFreeDocElementContent(ctxt->myDoc, last);
5183 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005184 return(NULL);
5185 }
5186 if (last == NULL) {
5187 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005188 if (ret != NULL)
5189 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005190 ret = cur = op;
5191 } else {
5192 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005193 if (op != NULL)
5194 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005195 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005196 if (last != NULL)
5197 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005198 cur =op;
5199 last = NULL;
5200 }
5201 } else if (RAW == '|') {
5202 if (type == 0) type = CUR;
5203
5204 /*
5205 * Detect "Name , Name | Name" error
5206 */
5207 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005209 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005211 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005212 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005213 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005214 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005215 return(NULL);
5216 }
5217 NEXT;
5218
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005219 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005220 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005221 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005222 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005224 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005225 return(NULL);
5226 }
5227 if (last == NULL) {
5228 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005229 if (ret != NULL)
5230 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 ret = cur = op;
5232 } else {
5233 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005234 if (op != NULL)
5235 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005236 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005237 if (last != NULL)
5238 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 cur =op;
5240 last = NULL;
5241 }
5242 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005243 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005245 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005246 return(NULL);
5247 }
5248 GROW;
5249 SKIP_BLANKS;
5250 GROW;
5251 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005252 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005253 /* Recurse on second child */
5254 NEXT;
5255 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005256 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005257 SKIP_BLANKS;
5258 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005259 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005260 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005261 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005262 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005263 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005264 return(NULL);
5265 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005266 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005267 if (RAW == '?') {
5268 last->ocur = XML_ELEMENT_CONTENT_OPT;
5269 NEXT;
5270 } else if (RAW == '*') {
5271 last->ocur = XML_ELEMENT_CONTENT_MULT;
5272 NEXT;
5273 } else if (RAW == '+') {
5274 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5275 NEXT;
5276 } else {
5277 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5278 }
5279 }
5280 SKIP_BLANKS;
5281 GROW;
5282 }
5283 if ((cur != NULL) && (last != NULL)) {
5284 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005285 if (last != NULL)
5286 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005287 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005288 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005289 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5290"Element content declaration doesn't start and stop in the same entity\n",
5291 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005292 }
Owen Taylor3473f882001-02-23 17:55:21 +00005293 NEXT;
5294 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005295 if (ret != NULL) {
5296 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5297 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5298 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5299 else
5300 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5301 }
Owen Taylor3473f882001-02-23 17:55:21 +00005302 NEXT;
5303 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005304 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005305 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005306 cur = ret;
5307 /*
5308 * Some normalization:
5309 * (a | b* | c?)* == (a | b | c)*
5310 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005311 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005312 if ((cur->c1 != NULL) &&
5313 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5314 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5315 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5316 if ((cur->c2 != NULL) &&
5317 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5318 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5319 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5320 cur = cur->c2;
5321 }
5322 }
Owen Taylor3473f882001-02-23 17:55:21 +00005323 NEXT;
5324 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005325 if (ret != NULL) {
5326 int found = 0;
5327
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005328 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5329 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5330 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005331 else
5332 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005333 /*
5334 * Some normalization:
5335 * (a | b*)+ == (a | b)*
5336 * (a | b?)+ == (a | b)*
5337 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005338 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005339 if ((cur->c1 != NULL) &&
5340 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5341 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5342 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5343 found = 1;
5344 }
5345 if ((cur->c2 != NULL) &&
5346 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5347 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5348 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5349 found = 1;
5350 }
5351 cur = cur->c2;
5352 }
5353 if (found)
5354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5355 }
Owen Taylor3473f882001-02-23 17:55:21 +00005356 NEXT;
5357 }
5358 return(ret);
5359}
5360
5361/**
5362 * xmlParseElementContentDecl:
5363 * @ctxt: an XML parser context
5364 * @name: the name of the element being defined.
5365 * @result: the Element Content pointer will be stored here if any
5366 *
5367 * parse the declaration for an Element content either Mixed or Children,
5368 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5369 *
5370 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5371 *
5372 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5373 */
5374
5375int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005376xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005377 xmlElementContentPtr *result) {
5378
5379 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005380 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005381 int res;
5382
5383 *result = NULL;
5384
5385 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005386 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005387 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005388 return(-1);
5389 }
5390 NEXT;
5391 GROW;
5392 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005393 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005394 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 res = XML_ELEMENT_TYPE_MIXED;
5396 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005397 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 res = XML_ELEMENT_TYPE_ELEMENT;
5399 }
Owen Taylor3473f882001-02-23 17:55:21 +00005400 SKIP_BLANKS;
5401 *result = tree;
5402 return(res);
5403}
5404
5405/**
5406 * xmlParseElementDecl:
5407 * @ctxt: an XML parser context
5408 *
5409 * parse an Element declaration.
5410 *
5411 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5412 *
5413 * [ VC: Unique Element Type Declaration ]
5414 * No element type may be declared more than once
5415 *
5416 * Returns the type of the element, or -1 in case of error
5417 */
5418int
5419xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005420 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005421 int ret = -1;
5422 xmlElementContentPtr content = NULL;
5423
Daniel Veillard4c778d82005-01-23 17:37:44 +00005424 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005425 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005426 xmlParserInputPtr input = ctxt->input;
5427
5428 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005429 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005432 }
5433 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005434 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005435 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005436 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5437 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005438 return(-1);
5439 }
5440 while ((RAW == 0) && (ctxt->inputNr > 1))
5441 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005442 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5444 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005445 }
5446 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005447 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005448 SKIP(5);
5449 /*
5450 * Element must always be empty.
5451 */
5452 ret = XML_ELEMENT_TYPE_EMPTY;
5453 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5454 (NXT(2) == 'Y')) {
5455 SKIP(3);
5456 /*
5457 * Element is a generic container.
5458 */
5459 ret = XML_ELEMENT_TYPE_ANY;
5460 } else if (RAW == '(') {
5461 ret = xmlParseElementContentDecl(ctxt, name, &content);
5462 } else {
5463 /*
5464 * [ WFC: PEs in Internal Subset ] error handling.
5465 */
5466 if ((RAW == '%') && (ctxt->external == 0) &&
5467 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005468 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005469 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005470 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005471 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005472 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5473 }
Owen Taylor3473f882001-02-23 17:55:21 +00005474 return(-1);
5475 }
5476
5477 SKIP_BLANKS;
5478 /*
5479 * Pop-up of finished entities.
5480 */
5481 while ((RAW == 0) && (ctxt->inputNr > 1))
5482 xmlPopInput(ctxt);
5483 SKIP_BLANKS;
5484
5485 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005486 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005487 if (content != NULL) {
5488 xmlFreeDocElementContent(ctxt->myDoc, content);
5489 }
Owen Taylor3473f882001-02-23 17:55:21 +00005490 } else {
5491 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005492 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5493 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
5495
5496 NEXT;
5497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005498 (ctxt->sax->elementDecl != NULL)) {
5499 if (content != NULL)
5500 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005501 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5502 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005503 if ((content != NULL) && (content->parent == NULL)) {
5504 /*
5505 * this is a trick: if xmlAddElementDecl is called,
5506 * instead of copying the full tree it is plugged directly
5507 * if called from the parser. Avoid duplicating the
5508 * interfaces or change the API/ABI
5509 */
5510 xmlFreeDocElementContent(ctxt->myDoc, content);
5511 }
5512 } else if (content != NULL) {
5513 xmlFreeDocElementContent(ctxt->myDoc, content);
5514 }
Owen Taylor3473f882001-02-23 17:55:21 +00005515 }
Owen Taylor3473f882001-02-23 17:55:21 +00005516 }
5517 return(ret);
5518}
5519
5520/**
Owen Taylor3473f882001-02-23 17:55:21 +00005521 * xmlParseConditionalSections
5522 * @ctxt: an XML parser context
5523 *
5524 * [61] conditionalSect ::= includeSect | ignoreSect
5525 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5526 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5527 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5528 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5529 */
5530
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005531static void
Owen Taylor3473f882001-02-23 17:55:21 +00005532xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5533 SKIP(3);
5534 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005535 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005536 SKIP(7);
5537 SKIP_BLANKS;
5538 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005539 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005540 } else {
5541 NEXT;
5542 }
5543 if (xmlParserDebugEntities) {
5544 if ((ctxt->input != NULL) && (ctxt->input->filename))
5545 xmlGenericError(xmlGenericErrorContext,
5546 "%s(%d): ", ctxt->input->filename,
5547 ctxt->input->line);
5548 xmlGenericError(xmlGenericErrorContext,
5549 "Entering INCLUDE Conditional Section\n");
5550 }
5551
5552 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5553 (NXT(2) != '>'))) {
5554 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005555 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005556
5557 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5558 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005559 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005560 NEXT;
5561 } else if (RAW == '%') {
5562 xmlParsePEReference(ctxt);
5563 } else
5564 xmlParseMarkupDecl(ctxt);
5565
5566 /*
5567 * Pop-up of finished entities.
5568 */
5569 while ((RAW == 0) && (ctxt->inputNr > 1))
5570 xmlPopInput(ctxt);
5571
Daniel Veillardfdc91562002-07-01 21:52:03 +00005572 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005573 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005574 break;
5575 }
5576 }
5577 if (xmlParserDebugEntities) {
5578 if ((ctxt->input != NULL) && (ctxt->input->filename))
5579 xmlGenericError(xmlGenericErrorContext,
5580 "%s(%d): ", ctxt->input->filename,
5581 ctxt->input->line);
5582 xmlGenericError(xmlGenericErrorContext,
5583 "Leaving INCLUDE Conditional Section\n");
5584 }
5585
Daniel Veillarda07050d2003-10-19 14:46:32 +00005586 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005587 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005588 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005589 int depth = 0;
5590
5591 SKIP(6);
5592 SKIP_BLANKS;
5593 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005594 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005595 } else {
5596 NEXT;
5597 }
5598 if (xmlParserDebugEntities) {
5599 if ((ctxt->input != NULL) && (ctxt->input->filename))
5600 xmlGenericError(xmlGenericErrorContext,
5601 "%s(%d): ", ctxt->input->filename,
5602 ctxt->input->line);
5603 xmlGenericError(xmlGenericErrorContext,
5604 "Entering IGNORE Conditional Section\n");
5605 }
5606
5607 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005608 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005609 * But disable SAX event generating DTD building in the meantime
5610 */
5611 state = ctxt->disableSAX;
5612 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005613 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005614 ctxt->instate = XML_PARSER_IGNORE;
5615
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005616 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005617 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5618 depth++;
5619 SKIP(3);
5620 continue;
5621 }
5622 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5623 if (--depth >= 0) SKIP(3);
5624 continue;
5625 }
5626 NEXT;
5627 continue;
5628 }
5629
5630 ctxt->disableSAX = state;
5631 ctxt->instate = instate;
5632
5633 if (xmlParserDebugEntities) {
5634 if ((ctxt->input != NULL) && (ctxt->input->filename))
5635 xmlGenericError(xmlGenericErrorContext,
5636 "%s(%d): ", ctxt->input->filename,
5637 ctxt->input->line);
5638 xmlGenericError(xmlGenericErrorContext,
5639 "Leaving IGNORE Conditional Section\n");
5640 }
5641
5642 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005643 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005644 }
5645
5646 if (RAW == 0)
5647 SHRINK;
5648
5649 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005650 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005651 } else {
5652 SKIP(3);
5653 }
5654}
5655
5656/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005657 * xmlParseMarkupDecl:
5658 * @ctxt: an XML parser context
5659 *
5660 * parse Markup declarations
5661 *
5662 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5663 * NotationDecl | PI | Comment
5664 *
5665 * [ VC: Proper Declaration/PE Nesting ]
5666 * Parameter-entity replacement text must be properly nested with
5667 * markup declarations. That is to say, if either the first character
5668 * or the last character of a markup declaration (markupdecl above) is
5669 * contained in the replacement text for a parameter-entity reference,
5670 * both must be contained in the same replacement text.
5671 *
5672 * [ WFC: PEs in Internal Subset ]
5673 * In the internal DTD subset, parameter-entity references can occur
5674 * only where markup declarations can occur, not within markup declarations.
5675 * (This does not apply to references that occur in external parameter
5676 * entities or to the external subset.)
5677 */
5678void
5679xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5680 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005681 if (CUR == '<') {
5682 if (NXT(1) == '!') {
5683 switch (NXT(2)) {
5684 case 'E':
5685 if (NXT(3) == 'L')
5686 xmlParseElementDecl(ctxt);
5687 else if (NXT(3) == 'N')
5688 xmlParseEntityDecl(ctxt);
5689 break;
5690 case 'A':
5691 xmlParseAttributeListDecl(ctxt);
5692 break;
5693 case 'N':
5694 xmlParseNotationDecl(ctxt);
5695 break;
5696 case '-':
5697 xmlParseComment(ctxt);
5698 break;
5699 default:
5700 /* there is an error but it will be detected later */
5701 break;
5702 }
5703 } else if (NXT(1) == '?') {
5704 xmlParsePI(ctxt);
5705 }
5706 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005707 /*
5708 * This is only for internal subset. On external entities,
5709 * the replacement is done before parsing stage
5710 */
5711 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5712 xmlParsePEReference(ctxt);
5713
5714 /*
5715 * Conditional sections are allowed from entities included
5716 * by PE References in the internal subset.
5717 */
5718 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5719 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5720 xmlParseConditionalSections(ctxt);
5721 }
5722 }
5723
5724 ctxt->instate = XML_PARSER_DTD;
5725}
5726
5727/**
5728 * xmlParseTextDecl:
5729 * @ctxt: an XML parser context
5730 *
5731 * parse an XML declaration header for external entities
5732 *
5733 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5734 *
5735 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5736 */
5737
5738void
5739xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5740 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005741 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005742
5743 /*
5744 * We know that '<?xml' is here.
5745 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005746 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005747 SKIP(5);
5748 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005749 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005750 return;
5751 }
5752
William M. Brack76e95df2003-10-18 16:20:14 +00005753 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005754 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5755 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005756 }
5757 SKIP_BLANKS;
5758
5759 /*
5760 * We may have the VersionInfo here.
5761 */
5762 version = xmlParseVersionInfo(ctxt);
5763 if (version == NULL)
5764 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005765 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005766 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005767 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5768 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005769 }
5770 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005771 ctxt->input->version = version;
5772
5773 /*
5774 * We must have the encoding declaration
5775 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005776 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005777 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5778 /*
5779 * The XML REC instructs us to stop parsing right here
5780 */
5781 return;
5782 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005783 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5784 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5785 "Missing encoding in text declaration\n");
5786 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005787
5788 SKIP_BLANKS;
5789 if ((RAW == '?') && (NXT(1) == '>')) {
5790 SKIP(2);
5791 } else if (RAW == '>') {
5792 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005793 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005794 NEXT;
5795 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005796 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005797 MOVETO_ENDTAG(CUR_PTR);
5798 NEXT;
5799 }
5800}
5801
5802/**
Owen Taylor3473f882001-02-23 17:55:21 +00005803 * xmlParseExternalSubset:
5804 * @ctxt: an XML parser context
5805 * @ExternalID: the external identifier
5806 * @SystemID: the system identifier (or URL)
5807 *
5808 * parse Markup declarations from an external subset
5809 *
5810 * [30] extSubset ::= textDecl? extSubsetDecl
5811 *
5812 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5813 */
5814void
5815xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5816 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005817 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005818 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005819 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005820 xmlParseTextDecl(ctxt);
5821 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5822 /*
5823 * The XML REC instructs us to stop parsing right here
5824 */
5825 ctxt->instate = XML_PARSER_EOF;
5826 return;
5827 }
5828 }
5829 if (ctxt->myDoc == NULL) {
5830 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5831 }
5832 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5833 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5834
5835 ctxt->instate = XML_PARSER_DTD;
5836 ctxt->external = 1;
5837 while (((RAW == '<') && (NXT(1) == '?')) ||
5838 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005839 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005840 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005841 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005842
5843 GROW;
5844 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5845 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005846 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005847 NEXT;
5848 } else if (RAW == '%') {
5849 xmlParsePEReference(ctxt);
5850 } else
5851 xmlParseMarkupDecl(ctxt);
5852
5853 /*
5854 * Pop-up of finished entities.
5855 */
5856 while ((RAW == 0) && (ctxt->inputNr > 1))
5857 xmlPopInput(ctxt);
5858
Daniel Veillardfdc91562002-07-01 21:52:03 +00005859 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005860 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005861 break;
5862 }
5863 }
5864
5865 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005867 }
5868
5869}
5870
5871/**
5872 * xmlParseReference:
5873 * @ctxt: an XML parser context
5874 *
5875 * parse and handle entity references in content, depending on the SAX
5876 * interface, this may end-up in a call to character() if this is a
5877 * CharRef, a predefined entity, if there is no reference() callback.
5878 * or if the parser was asked to switch to that mode.
5879 *
5880 * [67] Reference ::= EntityRef | CharRef
5881 */
5882void
5883xmlParseReference(xmlParserCtxtPtr ctxt) {
5884 xmlEntityPtr ent;
5885 xmlChar *val;
5886 if (RAW != '&') return;
5887
5888 if (NXT(1) == '#') {
5889 int i = 0;
5890 xmlChar out[10];
5891 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005892 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005893
5894 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5895 /*
5896 * So we are using non-UTF-8 buffers
5897 * Check that the char fit on 8bits, if not
5898 * generate a CharRef.
5899 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005900 if (value <= 0xFF) {
5901 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 out[1] = 0;
5903 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5904 (!ctxt->disableSAX))
5905 ctxt->sax->characters(ctxt->userData, out, 1);
5906 } else {
5907 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005908 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005910 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5912 (!ctxt->disableSAX))
5913 ctxt->sax->reference(ctxt->userData, out);
5914 }
5915 } else {
5916 /*
5917 * Just encode the value in UTF-8
5918 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005919 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005920 out[i] = 0;
5921 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5922 (!ctxt->disableSAX))
5923 ctxt->sax->characters(ctxt->userData, out, i);
5924 }
5925 } else {
5926 ent = xmlParseEntityRef(ctxt);
5927 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005928 if (!ctxt->wellFormed)
5929 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005930 if ((ent->name != NULL) &&
5931 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5932 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005933 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005934
5935
5936 /*
5937 * The first reference to the entity trigger a parsing phase
5938 * where the ent->children is filled with the result from
5939 * the parsing.
5940 */
5941 if (ent->children == NULL) {
5942 xmlChar *value;
5943 value = ent->content;
5944
5945 /*
5946 * Check that this entity is well formed
5947 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005948 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005949 (value[1] == 0) && (value[0] == '<') &&
5950 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5951 /*
5952 * DONE: get definite answer on this !!!
5953 * Lots of entity decls are used to declare a single
5954 * char
5955 * <!ENTITY lt "<">
5956 * Which seems to be valid since
5957 * 2.4: The ampersand character (&) and the left angle
5958 * bracket (<) may appear in their literal form only
5959 * when used ... They are also legal within the literal
5960 * entity value of an internal entity declaration;i
5961 * see "4.3.2 Well-Formed Parsed Entities".
5962 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5963 * Looking at the OASIS test suite and James Clark
5964 * tests, this is broken. However the XML REC uses
5965 * it. Is the XML REC not well-formed ????
5966 * This is a hack to avoid this problem
5967 *
5968 * ANSWER: since lt gt amp .. are already defined,
5969 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005970 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005971 * is lousy but acceptable.
5972 */
5973 list = xmlNewDocText(ctxt->myDoc, value);
5974 if (list != NULL) {
5975 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5976 (ent->children == NULL)) {
5977 ent->children = list;
5978 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005979 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005980 list->parent = (xmlNodePtr) ent;
5981 } else {
5982 xmlFreeNodeList(list);
5983 }
5984 } else if (list != NULL) {
5985 xmlFreeNodeList(list);
5986 }
5987 } else {
5988 /*
5989 * 4.3.2: An internal general parsed entity is well-formed
5990 * if its replacement text matches the production labeled
5991 * content.
5992 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005993
5994 void *user_data;
5995 /*
5996 * This is a bit hackish but this seems the best
5997 * way to make sure both SAX and DOM entity support
5998 * behaves okay.
5999 */
6000 if (ctxt->userData == ctxt)
6001 user_data = NULL;
6002 else
6003 user_data = ctxt->userData;
6004
Owen Taylor3473f882001-02-23 17:55:21 +00006005 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6006 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006007 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6008 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006009 ctxt->depth--;
6010 } else if (ent->etype ==
6011 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6012 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006013 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006014 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006015 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006016 ctxt->depth--;
6017 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006018 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006019 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6020 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006021 }
6022 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006023 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006024 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006025 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006026 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6027 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006028 (ent->children == NULL)) {
6029 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006030 if (ctxt->replaceEntities) {
6031 /*
6032 * Prune it directly in the generated document
6033 * except for single text nodes.
6034 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006035 if (((list->type == XML_TEXT_NODE) &&
6036 (list->next == NULL)) ||
6037 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006038 list->parent = (xmlNodePtr) ent;
6039 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006040 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006041 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006042 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006043 while (list != NULL) {
6044 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006045 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006046 if (list->next == NULL)
6047 ent->last = list;
6048 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006049 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006050 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006051#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006052 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6053 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006054#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006055 }
6056 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006057 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006058 while (list != NULL) {
6059 list->parent = (xmlNodePtr) ent;
6060 if (list->next == NULL)
6061 ent->last = list;
6062 list = list->next;
6063 }
Owen Taylor3473f882001-02-23 17:55:21 +00006064 }
6065 } else {
6066 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006067 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006068 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006069 } else if ((ret != XML_ERR_OK) &&
6070 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006071 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 } else if (list != NULL) {
6073 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006074 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 }
6076 }
6077 }
6078 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6079 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6080 /*
6081 * Create a node.
6082 */
6083 ctxt->sax->reference(ctxt->userData, ent->name);
6084 return;
6085 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006086 /*
6087 * There is a problem on the handling of _private for entities
6088 * (bug 155816): Should we copy the content of the field from
6089 * the entity (possibly overwriting some value set by the user
6090 * when a copy is created), should we leave it alone, or should
6091 * we try to take care of different situations? The problem
6092 * is exacerbated by the usage of this field by the xmlReader.
6093 * To fix this bug, we look at _private on the created node
6094 * and, if it's NULL, we copy in whatever was in the entity.
6095 * If it's not NULL we leave it alone. This is somewhat of a
6096 * hack - maybe we should have further tests to determine
6097 * what to do.
6098 */
Owen Taylor3473f882001-02-23 17:55:21 +00006099 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6100 /*
6101 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006102 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006103 * In the first occurrence list contains the replacement.
6104 * progressive == 2 means we are operating on the Reader
6105 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006106 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006107 if (((list == NULL) && (ent->owner == 0)) ||
6108 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006109 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006110
6111 /*
6112 * when operating on a reader, the entities definitions
6113 * are always owning the entities subtree.
6114 if (ctxt->parseMode == XML_PARSE_READER)
6115 ent->owner = 1;
6116 */
6117
Daniel Veillard62f313b2001-07-04 19:49:14 +00006118 cur = ent->children;
6119 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006120 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006121 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006122 if (nw->_private == NULL)
6123 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006124 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006125 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006126 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006127 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006128 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006129 if (cur == ent->last) {
6130 /*
6131 * needed to detect some strange empty
6132 * node cases in the reader tests
6133 */
6134 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006135 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006136 (nw->type == XML_ELEMENT_NODE) &&
6137 (nw->children == NULL))
6138 nw->extra = 1;
6139
Daniel Veillard62f313b2001-07-04 19:49:14 +00006140 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006141 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006142 cur = cur->next;
6143 }
Daniel Veillard81273902003-09-30 00:43:48 +00006144#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006145 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006146 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006147#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006148 } else if (list == NULL) {
6149 xmlNodePtr nw = NULL, cur, next, last,
6150 firstChild = NULL;
6151 /*
6152 * Copy the entity child list and make it the new
6153 * entity child list. The goal is to make sure any
6154 * ID or REF referenced will be the one from the
6155 * document content and not the entity copy.
6156 */
6157 cur = ent->children;
6158 ent->children = NULL;
6159 last = ent->last;
6160 ent->last = NULL;
6161 while (cur != NULL) {
6162 next = cur->next;
6163 cur->next = NULL;
6164 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006165 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006166 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006167 if (nw->_private == NULL)
6168 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006169 if (firstChild == NULL){
6170 firstChild = cur;
6171 }
6172 xmlAddChild((xmlNodePtr) ent, nw);
6173 xmlAddChild(ctxt->node, cur);
6174 }
6175 if (cur == last)
6176 break;
6177 cur = next;
6178 }
6179 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006180#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006181 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6182 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006183#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006184 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006185 const xmlChar *nbktext;
6186
Daniel Veillard62f313b2001-07-04 19:49:14 +00006187 /*
6188 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006189 * node with a possible previous text one which
6190 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006191 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006192 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6193 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006194 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006195 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006196 if ((ent->last != ent->children) &&
6197 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006198 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006199 xmlAddChildList(ctxt->node, ent->children);
6200 }
6201
Owen Taylor3473f882001-02-23 17:55:21 +00006202 /*
6203 * This is to avoid a nasty side effect, see
6204 * characters() in SAX.c
6205 */
6206 ctxt->nodemem = 0;
6207 ctxt->nodelen = 0;
6208 return;
6209 } else {
6210 /*
6211 * Probably running in SAX mode
6212 */
6213 xmlParserInputPtr input;
6214
6215 input = xmlNewEntityInputStream(ctxt, ent);
6216 xmlPushInput(ctxt, input);
6217 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006218 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6219 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006220 xmlParseTextDecl(ctxt);
6221 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6222 /*
6223 * The XML REC instructs us to stop parsing right here
6224 */
6225 ctxt->instate = XML_PARSER_EOF;
6226 return;
6227 }
6228 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006229 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6230 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006231 }
6232 }
6233 return;
6234 }
6235 }
6236 } else {
6237 val = ent->content;
6238 if (val == NULL) return;
6239 /*
6240 * inline the entity.
6241 */
6242 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6243 (!ctxt->disableSAX))
6244 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6245 }
6246 }
6247}
6248
6249/**
6250 * xmlParseEntityRef:
6251 * @ctxt: an XML parser context
6252 *
6253 * parse ENTITY references declarations
6254 *
6255 * [68] EntityRef ::= '&' Name ';'
6256 *
6257 * [ WFC: Entity Declared ]
6258 * In a document without any DTD, a document with only an internal DTD
6259 * subset which contains no parameter entity references, or a document
6260 * with "standalone='yes'", the Name given in the entity reference
6261 * must match that in an entity declaration, except that well-formed
6262 * documents need not declare any of the following entities: amp, lt,
6263 * gt, apos, quot. The declaration of a parameter entity must precede
6264 * any reference to it. Similarly, the declaration of a general entity
6265 * must precede any reference to it which appears in a default value in an
6266 * attribute-list declaration. Note that if entities are declared in the
6267 * external subset or in external parameter entities, a non-validating
6268 * processor is not obligated to read and process their declarations;
6269 * for such documents, the rule that an entity must be declared is a
6270 * well-formedness constraint only if standalone='yes'.
6271 *
6272 * [ WFC: Parsed Entity ]
6273 * An entity reference must not contain the name of an unparsed entity
6274 *
6275 * Returns the xmlEntityPtr if found, or NULL otherwise.
6276 */
6277xmlEntityPtr
6278xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006279 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006280 xmlEntityPtr ent = NULL;
6281
6282 GROW;
6283
6284 if (RAW == '&') {
6285 NEXT;
6286 name = xmlParseName(ctxt);
6287 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6289 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006290 } else {
6291 if (RAW == ';') {
6292 NEXT;
6293 /*
6294 * Ask first SAX for entity resolution, otherwise try the
6295 * predefined set.
6296 */
6297 if (ctxt->sax != NULL) {
6298 if (ctxt->sax->getEntity != NULL)
6299 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006300 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006301 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006302 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6303 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006304 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006305 }
Owen Taylor3473f882001-02-23 17:55:21 +00006306 }
6307 /*
6308 * [ WFC: Entity Declared ]
6309 * In a document without any DTD, a document with only an
6310 * internal DTD subset which contains no parameter entity
6311 * references, or a document with "standalone='yes'", the
6312 * Name given in the entity reference must match that in an
6313 * entity declaration, except that well-formed documents
6314 * need not declare any of the following entities: amp, lt,
6315 * gt, apos, quot.
6316 * The declaration of a parameter entity must precede any
6317 * reference to it.
6318 * Similarly, the declaration of a general entity must
6319 * precede any reference to it which appears in a default
6320 * value in an attribute-list declaration. Note that if
6321 * entities are declared in the external subset or in
6322 * external parameter entities, a non-validating processor
6323 * is not obligated to read and process their declarations;
6324 * for such documents, the rule that an entity must be
6325 * declared is a well-formedness constraint only if
6326 * standalone='yes'.
6327 */
6328 if (ent == NULL) {
6329 if ((ctxt->standalone == 1) ||
6330 ((ctxt->hasExternalSubset == 0) &&
6331 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006333 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006334 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006335 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006336 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006337 if ((ctxt->inSubset == 0) &&
6338 (ctxt->sax != NULL) &&
6339 (ctxt->sax->reference != NULL)) {
6340 ctxt->sax->reference(ctxt, name);
6341 }
Owen Taylor3473f882001-02-23 17:55:21 +00006342 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006343 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006344 }
6345
6346 /*
6347 * [ WFC: Parsed Entity ]
6348 * An entity reference must not contain the name of an
6349 * unparsed entity
6350 */
6351 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006353 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006354 }
6355
6356 /*
6357 * [ WFC: No External Entity References ]
6358 * Attribute values cannot contain direct or indirect
6359 * entity references to external entities.
6360 */
6361 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6362 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6364 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006365 }
6366 /*
6367 * [ WFC: No < in Attribute Values ]
6368 * The replacement text of any entity referred to directly or
6369 * indirectly in an attribute value (other than "&lt;") must
6370 * not contain a <.
6371 */
6372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6373 (ent != NULL) &&
6374 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6375 (ent->content != NULL) &&
6376 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006377 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006378 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006379 }
6380
6381 /*
6382 * Internal check, no parameter entities here ...
6383 */
6384 else {
6385 switch (ent->etype) {
6386 case XML_INTERNAL_PARAMETER_ENTITY:
6387 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006388 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6389 "Attempt to reference the parameter entity '%s'\n",
6390 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 break;
6392 default:
6393 break;
6394 }
6395 }
6396
6397 /*
6398 * [ WFC: No Recursion ]
6399 * A parsed entity must not contain a recursive reference
6400 * to itself, either directly or indirectly.
6401 * Done somewhere else
6402 */
6403
6404 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006405 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006406 }
Owen Taylor3473f882001-02-23 17:55:21 +00006407 }
6408 }
6409 return(ent);
6410}
6411
6412/**
6413 * xmlParseStringEntityRef:
6414 * @ctxt: an XML parser context
6415 * @str: a pointer to an index in the string
6416 *
6417 * parse ENTITY references declarations, but this version parses it from
6418 * a string value.
6419 *
6420 * [68] EntityRef ::= '&' Name ';'
6421 *
6422 * [ WFC: Entity Declared ]
6423 * In a document without any DTD, a document with only an internal DTD
6424 * subset which contains no parameter entity references, or a document
6425 * with "standalone='yes'", the Name given in the entity reference
6426 * must match that in an entity declaration, except that well-formed
6427 * documents need not declare any of the following entities: amp, lt,
6428 * gt, apos, quot. The declaration of a parameter entity must precede
6429 * any reference to it. Similarly, the declaration of a general entity
6430 * must precede any reference to it which appears in a default value in an
6431 * attribute-list declaration. Note that if entities are declared in the
6432 * external subset or in external parameter entities, a non-validating
6433 * processor is not obligated to read and process their declarations;
6434 * for such documents, the rule that an entity must be declared is a
6435 * well-formedness constraint only if standalone='yes'.
6436 *
6437 * [ WFC: Parsed Entity ]
6438 * An entity reference must not contain the name of an unparsed entity
6439 *
6440 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6441 * is updated to the current location in the string.
6442 */
6443xmlEntityPtr
6444xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6445 xmlChar *name;
6446 const xmlChar *ptr;
6447 xmlChar cur;
6448 xmlEntityPtr ent = NULL;
6449
6450 if ((str == NULL) || (*str == NULL))
6451 return(NULL);
6452 ptr = *str;
6453 cur = *ptr;
6454 if (cur == '&') {
6455 ptr++;
6456 cur = *ptr;
6457 name = xmlParseStringName(ctxt, &ptr);
6458 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006459 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6460 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006461 } else {
6462 if (*ptr == ';') {
6463 ptr++;
6464 /*
6465 * Ask first SAX for entity resolution, otherwise try the
6466 * predefined set.
6467 */
6468 if (ctxt->sax != NULL) {
6469 if (ctxt->sax->getEntity != NULL)
6470 ent = ctxt->sax->getEntity(ctxt->userData, name);
6471 if (ent == NULL)
6472 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006473 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006474 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006475 }
Owen Taylor3473f882001-02-23 17:55:21 +00006476 }
6477 /*
6478 * [ WFC: Entity Declared ]
6479 * In a document without any DTD, a document with only an
6480 * internal DTD subset which contains no parameter entity
6481 * references, or a document with "standalone='yes'", the
6482 * Name given in the entity reference must match that in an
6483 * entity declaration, except that well-formed documents
6484 * need not declare any of the following entities: amp, lt,
6485 * gt, apos, quot.
6486 * The declaration of a parameter entity must precede any
6487 * reference to it.
6488 * Similarly, the declaration of a general entity must
6489 * precede any reference to it which appears in a default
6490 * value in an attribute-list declaration. Note that if
6491 * entities are declared in the external subset or in
6492 * external parameter entities, a non-validating processor
6493 * is not obligated to read and process their declarations;
6494 * for such documents, the rule that an entity must be
6495 * declared is a well-formedness constraint only if
6496 * standalone='yes'.
6497 */
6498 if (ent == NULL) {
6499 if ((ctxt->standalone == 1) ||
6500 ((ctxt->hasExternalSubset == 0) &&
6501 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006502 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006503 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006504 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006505 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006506 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006507 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006508 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006509 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006510 }
6511
6512 /*
6513 * [ WFC: Parsed Entity ]
6514 * An entity reference must not contain the name of an
6515 * unparsed entity
6516 */
6517 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006518 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006519 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006520 }
6521
6522 /*
6523 * [ WFC: No External Entity References ]
6524 * Attribute values cannot contain direct or indirect
6525 * entity references to external entities.
6526 */
6527 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6528 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006529 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006530 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006531 }
6532 /*
6533 * [ WFC: No < in Attribute Values ]
6534 * The replacement text of any entity referred to directly or
6535 * indirectly in an attribute value (other than "&lt;") must
6536 * not contain a <.
6537 */
6538 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6539 (ent != NULL) &&
6540 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6541 (ent->content != NULL) &&
6542 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006543 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6544 "'<' in entity '%s' is not allowed in attributes values\n",
6545 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006546 }
6547
6548 /*
6549 * Internal check, no parameter entities here ...
6550 */
6551 else {
6552 switch (ent->etype) {
6553 case XML_INTERNAL_PARAMETER_ENTITY:
6554 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006555 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6556 "Attempt to reference the parameter entity '%s'\n",
6557 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006558 break;
6559 default:
6560 break;
6561 }
6562 }
6563
6564 /*
6565 * [ WFC: No Recursion ]
6566 * A parsed entity must not contain a recursive reference
6567 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006568 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006569 */
6570
6571 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006572 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006573 }
6574 xmlFree(name);
6575 }
6576 }
6577 *str = ptr;
6578 return(ent);
6579}
6580
6581/**
6582 * xmlParsePEReference:
6583 * @ctxt: an XML parser context
6584 *
6585 * parse PEReference declarations
6586 * The entity content is handled directly by pushing it's content as
6587 * a new input stream.
6588 *
6589 * [69] PEReference ::= '%' Name ';'
6590 *
6591 * [ WFC: No Recursion ]
6592 * A parsed entity must not contain a recursive
6593 * reference to itself, either directly or indirectly.
6594 *
6595 * [ WFC: Entity Declared ]
6596 * In a document without any DTD, a document with only an internal DTD
6597 * subset which contains no parameter entity references, or a document
6598 * with "standalone='yes'", ... ... The declaration of a parameter
6599 * entity must precede any reference to it...
6600 *
6601 * [ VC: Entity Declared ]
6602 * In a document with an external subset or external parameter entities
6603 * with "standalone='no'", ... ... The declaration of a parameter entity
6604 * must precede any reference to it...
6605 *
6606 * [ WFC: In DTD ]
6607 * Parameter-entity references may only appear in the DTD.
6608 * NOTE: misleading but this is handled.
6609 */
6610void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006611xmlParsePEReference(xmlParserCtxtPtr ctxt)
6612{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006613 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006614 xmlEntityPtr entity = NULL;
6615 xmlParserInputPtr input;
6616
6617 if (RAW == '%') {
6618 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006619 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006620 if (name == NULL) {
6621 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6622 "xmlParsePEReference: no name\n");
6623 } else {
6624 if (RAW == ';') {
6625 NEXT;
6626 if ((ctxt->sax != NULL) &&
6627 (ctxt->sax->getParameterEntity != NULL))
6628 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6629 name);
6630 if (entity == NULL) {
6631 /*
6632 * [ WFC: Entity Declared ]
6633 * In a document without any DTD, a document with only an
6634 * internal DTD subset which contains no parameter entity
6635 * references, or a document with "standalone='yes'", ...
6636 * ... The declaration of a parameter entity must precede
6637 * any reference to it...
6638 */
6639 if ((ctxt->standalone == 1) ||
6640 ((ctxt->hasExternalSubset == 0) &&
6641 (ctxt->hasPErefs == 0))) {
6642 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6643 "PEReference: %%%s; not found\n",
6644 name);
6645 } else {
6646 /*
6647 * [ VC: Entity Declared ]
6648 * In a document with an external subset or external
6649 * parameter entities with "standalone='no'", ...
6650 * ... The declaration of a parameter entity must
6651 * precede any reference to it...
6652 */
6653 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6654 "PEReference: %%%s; not found\n",
6655 name, NULL);
6656 ctxt->valid = 0;
6657 }
6658 } else {
6659 /*
6660 * Internal checking in case the entity quest barfed
6661 */
6662 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6663 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6664 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6665 "Internal: %%%s; is not a parameter entity\n",
6666 name, NULL);
6667 } else if (ctxt->input->free != deallocblankswrapper) {
6668 input =
6669 xmlNewBlanksWrapperInputStream(ctxt, entity);
6670 xmlPushInput(ctxt, input);
6671 } else {
6672 /*
6673 * TODO !!!
6674 * handle the extra spaces added before and after
6675 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6676 */
6677 input = xmlNewEntityInputStream(ctxt, entity);
6678 xmlPushInput(ctxt, input);
6679 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006680 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006681 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006682 xmlParseTextDecl(ctxt);
6683 if (ctxt->errNo ==
6684 XML_ERR_UNSUPPORTED_ENCODING) {
6685 /*
6686 * The XML REC instructs us to stop parsing
6687 * right here
6688 */
6689 ctxt->instate = XML_PARSER_EOF;
6690 return;
6691 }
6692 }
6693 }
6694 }
6695 ctxt->hasPErefs = 1;
6696 } else {
6697 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6698 }
6699 }
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
6701}
6702
6703/**
6704 * xmlParseStringPEReference:
6705 * @ctxt: an XML parser context
6706 * @str: a pointer to an index in the string
6707 *
6708 * parse PEReference declarations
6709 *
6710 * [69] PEReference ::= '%' Name ';'
6711 *
6712 * [ WFC: No Recursion ]
6713 * A parsed entity must not contain a recursive
6714 * reference to itself, either directly or indirectly.
6715 *
6716 * [ WFC: Entity Declared ]
6717 * In a document without any DTD, a document with only an internal DTD
6718 * subset which contains no parameter entity references, or a document
6719 * with "standalone='yes'", ... ... The declaration of a parameter
6720 * entity must precede any reference to it...
6721 *
6722 * [ VC: Entity Declared ]
6723 * In a document with an external subset or external parameter entities
6724 * with "standalone='no'", ... ... The declaration of a parameter entity
6725 * must precede any reference to it...
6726 *
6727 * [ WFC: In DTD ]
6728 * Parameter-entity references may only appear in the DTD.
6729 * NOTE: misleading but this is handled.
6730 *
6731 * Returns the string of the entity content.
6732 * str is updated to the current value of the index
6733 */
6734xmlEntityPtr
6735xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6736 const xmlChar *ptr;
6737 xmlChar cur;
6738 xmlChar *name;
6739 xmlEntityPtr entity = NULL;
6740
6741 if ((str == NULL) || (*str == NULL)) return(NULL);
6742 ptr = *str;
6743 cur = *ptr;
6744 if (cur == '%') {
6745 ptr++;
6746 cur = *ptr;
6747 name = xmlParseStringName(ctxt, &ptr);
6748 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6750 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006751 } else {
6752 cur = *ptr;
6753 if (cur == ';') {
6754 ptr++;
6755 cur = *ptr;
6756 if ((ctxt->sax != NULL) &&
6757 (ctxt->sax->getParameterEntity != NULL))
6758 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6759 name);
6760 if (entity == NULL) {
6761 /*
6762 * [ WFC: Entity Declared ]
6763 * In a document without any DTD, a document with only an
6764 * internal DTD subset which contains no parameter entity
6765 * references, or a document with "standalone='yes'", ...
6766 * ... The declaration of a parameter entity must precede
6767 * any reference to it...
6768 */
6769 if ((ctxt->standalone == 1) ||
6770 ((ctxt->hasExternalSubset == 0) &&
6771 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006772 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006773 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006774 } else {
6775 /*
6776 * [ VC: Entity Declared ]
6777 * In a document with an external subset or external
6778 * parameter entities with "standalone='no'", ...
6779 * ... The declaration of a parameter entity must
6780 * precede any reference to it...
6781 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006782 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6783 "PEReference: %%%s; not found\n",
6784 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006785 ctxt->valid = 0;
6786 }
6787 } else {
6788 /*
6789 * Internal checking in case the entity quest barfed
6790 */
6791 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6792 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006793 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6794 "%%%s; is not a parameter entity\n",
6795 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006796 }
6797 }
6798 ctxt->hasPErefs = 1;
6799 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006800 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006801 }
6802 xmlFree(name);
6803 }
6804 }
6805 *str = ptr;
6806 return(entity);
6807}
6808
6809/**
6810 * xmlParseDocTypeDecl:
6811 * @ctxt: an XML parser context
6812 *
6813 * parse a DOCTYPE declaration
6814 *
6815 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6816 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6817 *
6818 * [ VC: Root Element Type ]
6819 * The Name in the document type declaration must match the element
6820 * type of the root element.
6821 */
6822
6823void
6824xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006825 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006826 xmlChar *ExternalID = NULL;
6827 xmlChar *URI = NULL;
6828
6829 /*
6830 * We know that '<!DOCTYPE' has been detected.
6831 */
6832 SKIP(9);
6833
6834 SKIP_BLANKS;
6835
6836 /*
6837 * Parse the DOCTYPE name.
6838 */
6839 name = xmlParseName(ctxt);
6840 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006841 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6842 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006843 }
6844 ctxt->intSubName = name;
6845
6846 SKIP_BLANKS;
6847
6848 /*
6849 * Check for SystemID and ExternalID
6850 */
6851 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6852
6853 if ((URI != NULL) || (ExternalID != NULL)) {
6854 ctxt->hasExternalSubset = 1;
6855 }
6856 ctxt->extSubURI = URI;
6857 ctxt->extSubSystem = ExternalID;
6858
6859 SKIP_BLANKS;
6860
6861 /*
6862 * Create and update the internal subset.
6863 */
6864 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6865 (!ctxt->disableSAX))
6866 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6867
6868 /*
6869 * Is there any internal subset declarations ?
6870 * they are handled separately in xmlParseInternalSubset()
6871 */
6872 if (RAW == '[')
6873 return;
6874
6875 /*
6876 * We should be at the end of the DOCTYPE declaration.
6877 */
6878 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006879 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006880 }
6881 NEXT;
6882}
6883
6884/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006885 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006886 * @ctxt: an XML parser context
6887 *
6888 * parse the internal subset declaration
6889 *
6890 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6891 */
6892
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006893static void
Owen Taylor3473f882001-02-23 17:55:21 +00006894xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6895 /*
6896 * Is there any DTD definition ?
6897 */
6898 if (RAW == '[') {
6899 ctxt->instate = XML_PARSER_DTD;
6900 NEXT;
6901 /*
6902 * Parse the succession of Markup declarations and
6903 * PEReferences.
6904 * Subsequence (markupdecl | PEReference | S)*
6905 */
6906 while (RAW != ']') {
6907 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006908 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006909
6910 SKIP_BLANKS;
6911 xmlParseMarkupDecl(ctxt);
6912 xmlParsePEReference(ctxt);
6913
6914 /*
6915 * Pop-up of finished entities.
6916 */
6917 while ((RAW == 0) && (ctxt->inputNr > 1))
6918 xmlPopInput(ctxt);
6919
6920 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006921 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006922 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006923 break;
6924 }
6925 }
6926 if (RAW == ']') {
6927 NEXT;
6928 SKIP_BLANKS;
6929 }
6930 }
6931
6932 /*
6933 * We should be at the end of the DOCTYPE declaration.
6934 */
6935 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006937 }
6938 NEXT;
6939}
6940
Daniel Veillard81273902003-09-30 00:43:48 +00006941#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006942/**
6943 * xmlParseAttribute:
6944 * @ctxt: an XML parser context
6945 * @value: a xmlChar ** used to store the value of the attribute
6946 *
6947 * parse an attribute
6948 *
6949 * [41] Attribute ::= Name Eq AttValue
6950 *
6951 * [ WFC: No External Entity References ]
6952 * Attribute values cannot contain direct or indirect entity references
6953 * to external entities.
6954 *
6955 * [ WFC: No < in Attribute Values ]
6956 * The replacement text of any entity referred to directly or indirectly in
6957 * an attribute value (other than "&lt;") must not contain a <.
6958 *
6959 * [ VC: Attribute Value Type ]
6960 * The attribute must have been declared; the value must be of the type
6961 * declared for it.
6962 *
6963 * [25] Eq ::= S? '=' S?
6964 *
6965 * With namespace:
6966 *
6967 * [NS 11] Attribute ::= QName Eq AttValue
6968 *
6969 * Also the case QName == xmlns:??? is handled independently as a namespace
6970 * definition.
6971 *
6972 * Returns the attribute name, and the value in *value.
6973 */
6974
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006975const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006976xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006977 const xmlChar *name;
6978 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006979
6980 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006981 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006982 name = xmlParseName(ctxt);
6983 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006984 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006985 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006986 return(NULL);
6987 }
6988
6989 /*
6990 * read the value
6991 */
6992 SKIP_BLANKS;
6993 if (RAW == '=') {
6994 NEXT;
6995 SKIP_BLANKS;
6996 val = xmlParseAttValue(ctxt);
6997 ctxt->instate = XML_PARSER_CONTENT;
6998 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006999 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007000 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 return(NULL);
7002 }
7003
7004 /*
7005 * Check that xml:lang conforms to the specification
7006 * No more registered as an error, just generate a warning now
7007 * since this was deprecated in XML second edition
7008 */
7009 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7010 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007011 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7012 "Malformed value for xml:lang : %s\n",
7013 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007014 }
7015 }
7016
7017 /*
7018 * Check that xml:space conforms to the specification
7019 */
7020 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7021 if (xmlStrEqual(val, BAD_CAST "default"))
7022 *(ctxt->space) = 0;
7023 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7024 *(ctxt->space) = 1;
7025 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007026 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007027"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007028 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007029 }
7030 }
7031
7032 *value = val;
7033 return(name);
7034}
7035
7036/**
7037 * xmlParseStartTag:
7038 * @ctxt: an XML parser context
7039 *
7040 * parse a start of tag either for rule element or
7041 * EmptyElement. In both case we don't parse the tag closing chars.
7042 *
7043 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7044 *
7045 * [ WFC: Unique Att Spec ]
7046 * No attribute name may appear more than once in the same start-tag or
7047 * empty-element tag.
7048 *
7049 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7050 *
7051 * [ WFC: Unique Att Spec ]
7052 * No attribute name may appear more than once in the same start-tag or
7053 * empty-element tag.
7054 *
7055 * With namespace:
7056 *
7057 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7058 *
7059 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7060 *
7061 * Returns the element name parsed
7062 */
7063
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007064const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007065xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007066 const xmlChar *name;
7067 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007068 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007069 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007070 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007071 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007072 int i;
7073
7074 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007075 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007076
7077 name = xmlParseName(ctxt);
7078 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007080 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007081 return(NULL);
7082 }
7083
7084 /*
7085 * Now parse the attributes, it ends up with the ending
7086 *
7087 * (S Attribute)* S?
7088 */
7089 SKIP_BLANKS;
7090 GROW;
7091
Daniel Veillard21a0f912001-02-25 19:54:14 +00007092 while ((RAW != '>') &&
7093 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007094 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007095 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007096 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007097
7098 attname = xmlParseAttribute(ctxt, &attvalue);
7099 if ((attname != NULL) && (attvalue != NULL)) {
7100 /*
7101 * [ WFC: Unique Att Spec ]
7102 * No attribute name may appear more than once in the same
7103 * start-tag or empty-element tag.
7104 */
7105 for (i = 0; i < nbatts;i += 2) {
7106 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007107 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007108 xmlFree(attvalue);
7109 goto failed;
7110 }
7111 }
Owen Taylor3473f882001-02-23 17:55:21 +00007112 /*
7113 * Add the pair to atts
7114 */
7115 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007116 maxatts = 22; /* allow for 10 attrs by default */
7117 atts = (const xmlChar **)
7118 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007119 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007120 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007121 if (attvalue != NULL)
7122 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007123 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007124 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007125 ctxt->atts = atts;
7126 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007128 const xmlChar **n;
7129
Owen Taylor3473f882001-02-23 17:55:21 +00007130 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007131 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007132 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007133 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007134 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007135 if (attvalue != NULL)
7136 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007137 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007138 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007139 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007140 ctxt->atts = atts;
7141 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007142 }
7143 atts[nbatts++] = attname;
7144 atts[nbatts++] = attvalue;
7145 atts[nbatts] = NULL;
7146 atts[nbatts + 1] = NULL;
7147 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007148 if (attvalue != NULL)
7149 xmlFree(attvalue);
7150 }
7151
7152failed:
7153
Daniel Veillard3772de32002-12-17 10:31:45 +00007154 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007155 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7156 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007157 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7159 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007160 }
7161 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007162 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7163 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007164 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7165 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007166 break;
7167 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007168 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007169 GROW;
7170 }
7171
7172 /*
7173 * SAX: Start of Element !
7174 */
7175 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007176 (!ctxt->disableSAX)) {
7177 if (nbatts > 0)
7178 ctxt->sax->startElement(ctxt->userData, name, atts);
7179 else
7180 ctxt->sax->startElement(ctxt->userData, name, NULL);
7181 }
Owen Taylor3473f882001-02-23 17:55:21 +00007182
7183 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007184 /* Free only the content strings */
7185 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007186 if (atts[i] != NULL)
7187 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007188 }
7189 return(name);
7190}
7191
7192/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007193 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007194 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007195 * @line: line of the start tag
7196 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007197 *
7198 * parse an end of tag
7199 *
7200 * [42] ETag ::= '</' Name S? '>'
7201 *
7202 * With namespace
7203 *
7204 * [NS 9] ETag ::= '</' QName S? '>'
7205 */
7206
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007207static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007208xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007209 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007210
7211 GROW;
7212 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007213 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007214 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007215 return;
7216 }
7217 SKIP(2);
7218
Daniel Veillard46de64e2002-05-29 08:21:33 +00007219 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007220
7221 /*
7222 * We should definitely be at the ending "S? '>'" part
7223 */
7224 GROW;
7225 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007226 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007227 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007228 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007229 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007230
7231 /*
7232 * [ WFC: Element Type Match ]
7233 * The Name in an element's end-tag must match the element type in the
7234 * start-tag.
7235 *
7236 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007237 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007238 if (name == NULL) name = BAD_CAST "unparseable";
7239 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007240 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007241 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007242 }
7243
7244 /*
7245 * SAX: End of Tag
7246 */
7247 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7248 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007249 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007250
Daniel Veillarde57ec792003-09-10 10:50:59 +00007251 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007252 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007253 return;
7254}
7255
7256/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007257 * xmlParseEndTag:
7258 * @ctxt: an XML parser context
7259 *
7260 * parse an end of tag
7261 *
7262 * [42] ETag ::= '</' Name S? '>'
7263 *
7264 * With namespace
7265 *
7266 * [NS 9] ETag ::= '</' QName S? '>'
7267 */
7268
7269void
7270xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007271 xmlParseEndTag1(ctxt, 0);
7272}
Daniel Veillard81273902003-09-30 00:43:48 +00007273#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274
7275/************************************************************************
7276 * *
7277 * SAX 2 specific operations *
7278 * *
7279 ************************************************************************/
7280
7281static const xmlChar *
7282xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7283 int len = 0, l;
7284 int c;
7285 int count = 0;
7286
7287 /*
7288 * Handler for more complex cases
7289 */
7290 GROW;
7291 c = CUR_CHAR(l);
7292 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007293 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007294 return(NULL);
7295 }
7296
7297 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007298 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007299 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007300 (IS_COMBINING(c)) ||
7301 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007302 if (count++ > 100) {
7303 count = 0;
7304 GROW;
7305 }
7306 len += l;
7307 NEXTL(l);
7308 c = CUR_CHAR(l);
7309 }
7310 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7311}
7312
7313/*
7314 * xmlGetNamespace:
7315 * @ctxt: an XML parser context
7316 * @prefix: the prefix to lookup
7317 *
7318 * Lookup the namespace name for the @prefix (which ca be NULL)
7319 * The prefix must come from the @ctxt->dict dictionnary
7320 *
7321 * Returns the namespace name or NULL if not bound
7322 */
7323static const xmlChar *
7324xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7325 int i;
7326
Daniel Veillarde57ec792003-09-10 10:50:59 +00007327 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007328 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007329 if (ctxt->nsTab[i] == prefix) {
7330 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7331 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007332 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007333 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 return(NULL);
7335}
7336
7337/**
7338 * xmlParseNCName:
7339 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007340 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007341 *
7342 * parse an XML name.
7343 *
7344 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7345 * CombiningChar | Extender
7346 *
7347 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7348 *
7349 * Returns the Name parsed or NULL
7350 */
7351
7352static const xmlChar *
7353xmlParseNCName(xmlParserCtxtPtr ctxt) {
7354 const xmlChar *in;
7355 const xmlChar *ret;
7356 int count = 0;
7357
7358 /*
7359 * Accelerator for simple ASCII names
7360 */
7361 in = ctxt->input->cur;
7362 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7363 ((*in >= 0x41) && (*in <= 0x5A)) ||
7364 (*in == '_')) {
7365 in++;
7366 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7367 ((*in >= 0x41) && (*in <= 0x5A)) ||
7368 ((*in >= 0x30) && (*in <= 0x39)) ||
7369 (*in == '_') || (*in == '-') ||
7370 (*in == '.'))
7371 in++;
7372 if ((*in > 0) && (*in < 0x80)) {
7373 count = in - ctxt->input->cur;
7374 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7375 ctxt->input->cur = in;
7376 ctxt->nbChars += count;
7377 ctxt->input->col += count;
7378 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007379 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007380 }
7381 return(ret);
7382 }
7383 }
7384 return(xmlParseNCNameComplex(ctxt));
7385}
7386
7387/**
7388 * xmlParseQName:
7389 * @ctxt: an XML parser context
7390 * @prefix: pointer to store the prefix part
7391 *
7392 * parse an XML Namespace QName
7393 *
7394 * [6] QName ::= (Prefix ':')? LocalPart
7395 * [7] Prefix ::= NCName
7396 * [8] LocalPart ::= NCName
7397 *
7398 * Returns the Name parsed or NULL
7399 */
7400
7401static const xmlChar *
7402xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7403 const xmlChar *l, *p;
7404
7405 GROW;
7406
7407 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007408 if (l == NULL) {
7409 if (CUR == ':') {
7410 l = xmlParseName(ctxt);
7411 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007412 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7413 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007414 *prefix = NULL;
7415 return(l);
7416 }
7417 }
7418 return(NULL);
7419 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007420 if (CUR == ':') {
7421 NEXT;
7422 p = l;
7423 l = xmlParseNCName(ctxt);
7424 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007425 xmlChar *tmp;
7426
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007427 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7428 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007429 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7430 p = xmlDictLookup(ctxt->dict, tmp, -1);
7431 if (tmp != NULL) xmlFree(tmp);
7432 *prefix = NULL;
7433 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007434 }
7435 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007436 xmlChar *tmp;
7437
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007438 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7439 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007440 NEXT;
7441 tmp = (xmlChar *) xmlParseName(ctxt);
7442 if (tmp != NULL) {
7443 tmp = xmlBuildQName(tmp, l, NULL, 0);
7444 l = xmlDictLookup(ctxt->dict, tmp, -1);
7445 if (tmp != NULL) xmlFree(tmp);
7446 *prefix = p;
7447 return(l);
7448 }
7449 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7450 l = xmlDictLookup(ctxt->dict, tmp, -1);
7451 if (tmp != NULL) xmlFree(tmp);
7452 *prefix = p;
7453 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007454 }
7455 *prefix = p;
7456 } else
7457 *prefix = NULL;
7458 return(l);
7459}
7460
7461/**
7462 * xmlParseQNameAndCompare:
7463 * @ctxt: an XML parser context
7464 * @name: the localname
7465 * @prefix: the prefix, if any.
7466 *
7467 * parse an XML name and compares for match
7468 * (specialized for endtag parsing)
7469 *
7470 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7471 * and the name for mismatch
7472 */
7473
7474static const xmlChar *
7475xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7476 xmlChar const *prefix) {
7477 const xmlChar *cmp = name;
7478 const xmlChar *in;
7479 const xmlChar *ret;
7480 const xmlChar *prefix2;
7481
7482 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7483
7484 GROW;
7485 in = ctxt->input->cur;
7486
7487 cmp = prefix;
7488 while (*in != 0 && *in == *cmp) {
7489 ++in;
7490 ++cmp;
7491 }
7492 if ((*cmp == 0) && (*in == ':')) {
7493 in++;
7494 cmp = name;
7495 while (*in != 0 && *in == *cmp) {
7496 ++in;
7497 ++cmp;
7498 }
William M. Brack76e95df2003-10-18 16:20:14 +00007499 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007500 /* success */
7501 ctxt->input->cur = in;
7502 return((const xmlChar*) 1);
7503 }
7504 }
7505 /*
7506 * all strings coms from the dictionary, equality can be done directly
7507 */
7508 ret = xmlParseQName (ctxt, &prefix2);
7509 if ((ret == name) && (prefix == prefix2))
7510 return((const xmlChar*) 1);
7511 return ret;
7512}
7513
7514/**
7515 * xmlParseAttValueInternal:
7516 * @ctxt: an XML parser context
7517 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007518 * @alloc: whether the attribute was reallocated as a new string
7519 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007520 *
7521 * parse a value for an attribute.
7522 * NOTE: if no normalization is needed, the routine will return pointers
7523 * directly from the data buffer.
7524 *
7525 * 3.3.3 Attribute-Value Normalization:
7526 * Before the value of an attribute is passed to the application or
7527 * checked for validity, the XML processor must normalize it as follows:
7528 * - a character reference is processed by appending the referenced
7529 * character to the attribute value
7530 * - an entity reference is processed by recursively processing the
7531 * replacement text of the entity
7532 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7533 * appending #x20 to the normalized value, except that only a single
7534 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7535 * parsed entity or the literal entity value of an internal parsed entity
7536 * - other characters are processed by appending them to the normalized value
7537 * If the declared value is not CDATA, then the XML processor must further
7538 * process the normalized attribute value by discarding any leading and
7539 * trailing space (#x20) characters, and by replacing sequences of space
7540 * (#x20) characters by a single space (#x20) character.
7541 * All attributes for which no declaration has been read should be treated
7542 * by a non-validating parser as if declared CDATA.
7543 *
7544 * Returns the AttValue parsed or NULL. The value has to be freed by the
7545 * caller if it was copied, this can be detected by val[*len] == 0.
7546 */
7547
7548static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007549xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7550 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007551{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007552 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007553 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007554 xmlChar *ret = NULL;
7555
7556 GROW;
7557 in = (xmlChar *) CUR_PTR;
7558 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007559 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007560 return (NULL);
7561 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007562 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007563
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007564 /*
7565 * try to handle in this routine the most common case where no
7566 * allocation of a new string is required and where content is
7567 * pure ASCII.
7568 */
7569 limit = *in++;
7570 end = ctxt->input->end;
7571 start = in;
7572 if (in >= end) {
7573 const xmlChar *oldbase = ctxt->input->base;
7574 GROW;
7575 if (oldbase != ctxt->input->base) {
7576 long delta = ctxt->input->base - oldbase;
7577 start = start + delta;
7578 in = in + delta;
7579 }
7580 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007581 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007582 if (normalize) {
7583 /*
7584 * Skip any leading spaces
7585 */
7586 while ((in < end) && (*in != limit) &&
7587 ((*in == 0x20) || (*in == 0x9) ||
7588 (*in == 0xA) || (*in == 0xD))) {
7589 in++;
7590 start = in;
7591 if (in >= end) {
7592 const xmlChar *oldbase = ctxt->input->base;
7593 GROW;
7594 if (oldbase != ctxt->input->base) {
7595 long delta = ctxt->input->base - oldbase;
7596 start = start + delta;
7597 in = in + delta;
7598 }
7599 end = ctxt->input->end;
7600 }
7601 }
7602 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7603 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7604 if ((*in++ == 0x20) && (*in == 0x20)) break;
7605 if (in >= end) {
7606 const xmlChar *oldbase = ctxt->input->base;
7607 GROW;
7608 if (oldbase != ctxt->input->base) {
7609 long delta = ctxt->input->base - oldbase;
7610 start = start + delta;
7611 in = in + delta;
7612 }
7613 end = ctxt->input->end;
7614 }
7615 }
7616 last = in;
7617 /*
7618 * skip the trailing blanks
7619 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007620 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007621 while ((in < end) && (*in != limit) &&
7622 ((*in == 0x20) || (*in == 0x9) ||
7623 (*in == 0xA) || (*in == 0xD))) {
7624 in++;
7625 if (in >= end) {
7626 const xmlChar *oldbase = ctxt->input->base;
7627 GROW;
7628 if (oldbase != ctxt->input->base) {
7629 long delta = ctxt->input->base - oldbase;
7630 start = start + delta;
7631 in = in + delta;
7632 last = last + delta;
7633 }
7634 end = ctxt->input->end;
7635 }
7636 }
7637 if (*in != limit) goto need_complex;
7638 } else {
7639 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7640 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7641 in++;
7642 if (in >= end) {
7643 const xmlChar *oldbase = ctxt->input->base;
7644 GROW;
7645 if (oldbase != ctxt->input->base) {
7646 long delta = ctxt->input->base - oldbase;
7647 start = start + delta;
7648 in = in + delta;
7649 }
7650 end = ctxt->input->end;
7651 }
7652 }
7653 last = in;
7654 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007655 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007656 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007657 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007658 *len = last - start;
7659 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007660 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007661 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007662 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 }
7664 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007665 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007666 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007667need_complex:
7668 if (alloc) *alloc = 1;
7669 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007670}
7671
7672/**
7673 * xmlParseAttribute2:
7674 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007675 * @pref: the element prefix
7676 * @elem: the element name
7677 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007678 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007679 * @len: an int * to save the length of the attribute
7680 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007681 *
7682 * parse an attribute in the new SAX2 framework.
7683 *
7684 * Returns the attribute name, and the value in *value, .
7685 */
7686
7687static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007688xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7689 const xmlChar *pref, const xmlChar *elem,
7690 const xmlChar **prefix, xmlChar **value,
7691 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007692 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007693 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007694 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007695
7696 *value = NULL;
7697 GROW;
7698 name = xmlParseQName(ctxt, prefix);
7699 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007700 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7701 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 return(NULL);
7703 }
7704
7705 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007706 * get the type if needed
7707 */
7708 if (ctxt->attsSpecial != NULL) {
7709 int type;
7710
7711 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7712 pref, elem, *prefix, name);
7713 if (type != 0) normalize = 1;
7714 }
7715
7716 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007717 * read the value
7718 */
7719 SKIP_BLANKS;
7720 if (RAW == '=') {
7721 NEXT;
7722 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007723 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007724 ctxt->instate = XML_PARSER_CONTENT;
7725 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007726 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007727 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007728 return(NULL);
7729 }
7730
Daniel Veillardd8925572005-06-08 22:34:55 +00007731 if (*prefix == ctxt->str_xml) {
7732 /*
7733 * Check that xml:lang conforms to the specification
7734 * No more registered as an error, just generate a warning now
7735 * since this was deprecated in XML second edition
7736 */
7737 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7738 internal_val = xmlStrndup(val, *len);
7739 if (!xmlCheckLanguageID(internal_val)) {
7740 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7741 "Malformed value for xml:lang : %s\n",
7742 internal_val, NULL);
7743 }
7744 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007745
Daniel Veillardd8925572005-06-08 22:34:55 +00007746 /*
7747 * Check that xml:space conforms to the specification
7748 */
7749 if (xmlStrEqual(name, BAD_CAST "space")) {
7750 internal_val = xmlStrndup(val, *len);
7751 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7752 *(ctxt->space) = 0;
7753 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7754 *(ctxt->space) = 1;
7755 else {
7756 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007757"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007758 internal_val, NULL);
7759 }
7760 }
7761 if (internal_val) {
7762 xmlFree(internal_val);
7763 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007764 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007765
7766 *value = val;
7767 return(name);
7768}
7769
7770/**
7771 * xmlParseStartTag2:
7772 * @ctxt: an XML parser context
7773 *
7774 * parse a start of tag either for rule element or
7775 * EmptyElement. In both case we don't parse the tag closing chars.
7776 * This routine is called when running SAX2 parsing
7777 *
7778 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7779 *
7780 * [ WFC: Unique Att Spec ]
7781 * No attribute name may appear more than once in the same start-tag or
7782 * empty-element tag.
7783 *
7784 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7785 *
7786 * [ WFC: Unique Att Spec ]
7787 * No attribute name may appear more than once in the same start-tag or
7788 * empty-element tag.
7789 *
7790 * With namespace:
7791 *
7792 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7793 *
7794 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7795 *
7796 * Returns the element name parsed
7797 */
7798
7799static const xmlChar *
7800xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007801 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007802 const xmlChar *localname;
7803 const xmlChar *prefix;
7804 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007805 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007806 const xmlChar *nsname;
7807 xmlChar *attvalue;
7808 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007809 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007810 int nratts, nbatts, nbdef;
7811 int i, j, nbNs, attval;
7812 const xmlChar *base;
7813 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007814 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815
7816 if (RAW != '<') return(NULL);
7817 NEXT1;
7818
7819 /*
7820 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7821 * point since the attribute values may be stored as pointers to
7822 * the buffer and calling SHRINK would destroy them !
7823 * The Shrinking is only possible once the full set of attribute
7824 * callbacks have been done.
7825 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007826reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007827 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007828 base = ctxt->input->base;
7829 cur = ctxt->input->cur - ctxt->input->base;
7830 nbatts = 0;
7831 nratts = 0;
7832 nbdef = 0;
7833 nbNs = 0;
7834 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007835 /* Forget any namespaces added during an earlier parse of this element. */
7836 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007837
7838 localname = xmlParseQName(ctxt, &prefix);
7839 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007840 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7841 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007842 return(NULL);
7843 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007844 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007845
7846 /*
7847 * Now parse the attributes, it ends up with the ending
7848 *
7849 * (S Attribute)* S?
7850 */
7851 SKIP_BLANKS;
7852 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007853 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007854
7855 while ((RAW != '>') &&
7856 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007857 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007858 const xmlChar *q = CUR_PTR;
7859 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007860 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007862 attname = xmlParseAttribute2(ctxt, prefix, localname,
7863 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007864 if ((attname != NULL) && (attvalue != NULL)) {
7865 if (len < 0) len = xmlStrlen(attvalue);
7866 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007867 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7868 xmlURIPtr uri;
7869
7870 if (*URL != 0) {
7871 uri = xmlParseURI((const char *) URL);
7872 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007873 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7874 "xmlns: %s not a valid URI\n",
7875 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007876 } else {
7877 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007878 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7879 "xmlns: URI %s is not absolute\n",
7880 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007881 }
7882 xmlFreeURI(uri);
7883 }
7884 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007885 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007886 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007887 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007888 for (j = 1;j <= nbNs;j++)
7889 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7890 break;
7891 if (j <= nbNs)
7892 xmlErrAttributeDup(ctxt, NULL, attname);
7893 else
7894 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007895 if (alloc != 0) xmlFree(attvalue);
7896 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007897 continue;
7898 }
7899 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007900 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7901 xmlURIPtr uri;
7902
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007903 if (attname == ctxt->str_xml) {
7904 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007905 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7906 "xml namespace prefix mapped to wrong URI\n",
7907 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007908 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007909 /*
7910 * Do not keep a namespace definition node
7911 */
7912 if (alloc != 0) xmlFree(attvalue);
7913 SKIP_BLANKS;
7914 continue;
7915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007916 uri = xmlParseURI((const char *) URL);
7917 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007918 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7919 "xmlns:%s: '%s' is not a valid URI\n",
7920 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007921 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007922 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007923 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7924 "xmlns:%s: URI %s is not absolute\n",
7925 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007926 }
7927 xmlFreeURI(uri);
7928 }
7929
Daniel Veillard0fb18932003-09-07 09:14:37 +00007930 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007931 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007932 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007933 for (j = 1;j <= nbNs;j++)
7934 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7935 break;
7936 if (j <= nbNs)
7937 xmlErrAttributeDup(ctxt, aprefix, attname);
7938 else
7939 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007940 if (alloc != 0) xmlFree(attvalue);
7941 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007942 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007943 continue;
7944 }
7945
7946 /*
7947 * Add the pair to atts
7948 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007949 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7950 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007951 if (attvalue[len] == 0)
7952 xmlFree(attvalue);
7953 goto failed;
7954 }
7955 maxatts = ctxt->maxatts;
7956 atts = ctxt->atts;
7957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007958 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007959 atts[nbatts++] = attname;
7960 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007961 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 atts[nbatts++] = attvalue;
7963 attvalue += len;
7964 atts[nbatts++] = attvalue;
7965 /*
7966 * tag if some deallocation is needed
7967 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007968 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007969 } else {
7970 if ((attvalue != NULL) && (attvalue[len] == 0))
7971 xmlFree(attvalue);
7972 }
7973
7974failed:
7975
7976 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007977 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007978 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7979 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007980 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7982 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007983 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007984 }
7985 SKIP_BLANKS;
7986 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7987 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007988 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007989 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990 break;
7991 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007992 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007993 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007994 }
7995
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007997 * The attributes defaulting
7998 */
7999 if (ctxt->attsDefault != NULL) {
8000 xmlDefAttrsPtr defaults;
8001
8002 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8003 if (defaults != NULL) {
8004 for (i = 0;i < defaults->nbAttrs;i++) {
8005 attname = defaults->values[4 * i];
8006 aprefix = defaults->values[4 * i + 1];
8007
8008 /*
8009 * special work for namespaces defaulted defs
8010 */
8011 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8012 /*
8013 * check that it's not a defined namespace
8014 */
8015 for (j = 1;j <= nbNs;j++)
8016 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8017 break;
8018 if (j <= nbNs) continue;
8019
8020 nsname = xmlGetNamespace(ctxt, NULL);
8021 if (nsname != defaults->values[4 * i + 2]) {
8022 if (nsPush(ctxt, NULL,
8023 defaults->values[4 * i + 2]) > 0)
8024 nbNs++;
8025 }
8026 } else if (aprefix == ctxt->str_xmlns) {
8027 /*
8028 * check that it's not a defined namespace
8029 */
8030 for (j = 1;j <= nbNs;j++)
8031 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8032 break;
8033 if (j <= nbNs) continue;
8034
8035 nsname = xmlGetNamespace(ctxt, attname);
8036 if (nsname != defaults->values[2]) {
8037 if (nsPush(ctxt, attname,
8038 defaults->values[4 * i + 2]) > 0)
8039 nbNs++;
8040 }
8041 } else {
8042 /*
8043 * check that it's not a defined attribute
8044 */
8045 for (j = 0;j < nbatts;j+=5) {
8046 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8047 break;
8048 }
8049 if (j < nbatts) continue;
8050
8051 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8052 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008053 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008054 }
8055 maxatts = ctxt->maxatts;
8056 atts = ctxt->atts;
8057 }
8058 atts[nbatts++] = attname;
8059 atts[nbatts++] = aprefix;
8060 if (aprefix == NULL)
8061 atts[nbatts++] = NULL;
8062 else
8063 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8064 atts[nbatts++] = defaults->values[4 * i + 2];
8065 atts[nbatts++] = defaults->values[4 * i + 3];
8066 nbdef++;
8067 }
8068 }
8069 }
8070 }
8071
Daniel Veillarde70c8772003-11-25 07:21:18 +00008072 /*
8073 * The attributes checkings
8074 */
8075 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008076 /*
8077 * The default namespace does not apply to attribute names.
8078 */
8079 if (atts[i + 1] != NULL) {
8080 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8081 if (nsname == NULL) {
8082 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8083 "Namespace prefix %s for %s on %s is not defined\n",
8084 atts[i + 1], atts[i], localname);
8085 }
8086 atts[i + 2] = nsname;
8087 } else
8088 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008089 /*
8090 * [ WFC: Unique Att Spec ]
8091 * No attribute name may appear more than once in the same
8092 * start-tag or empty-element tag.
8093 * As extended by the Namespace in XML REC.
8094 */
8095 for (j = 0; j < i;j += 5) {
8096 if (atts[i] == atts[j]) {
8097 if (atts[i+1] == atts[j+1]) {
8098 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8099 break;
8100 }
8101 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8102 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8103 "Namespaced Attribute %s in '%s' redefined\n",
8104 atts[i], nsname, NULL);
8105 break;
8106 }
8107 }
8108 }
8109 }
8110
Daniel Veillarde57ec792003-09-10 10:50:59 +00008111 nsname = xmlGetNamespace(ctxt, prefix);
8112 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008113 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8114 "Namespace prefix %s on %s is not defined\n",
8115 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008116 }
8117 *pref = prefix;
8118 *URI = nsname;
8119
8120 /*
8121 * SAX: Start of Element !
8122 */
8123 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8124 (!ctxt->disableSAX)) {
8125 if (nbNs > 0)
8126 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8127 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8128 nbatts / 5, nbdef, atts);
8129 else
8130 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8131 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8132 }
8133
8134 /*
8135 * Free up attribute allocated strings if needed
8136 */
8137 if (attval != 0) {
8138 for (i = 3,j = 0; j < nratts;i += 5,j++)
8139 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8140 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008141 }
8142
8143 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008144
8145base_changed:
8146 /*
8147 * the attribute strings are valid iif the base didn't changed
8148 */
8149 if (attval != 0) {
8150 for (i = 3,j = 0; j < nratts;i += 5,j++)
8151 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8152 xmlFree((xmlChar *) atts[i]);
8153 }
8154 ctxt->input->cur = ctxt->input->base + cur;
8155 if (ctxt->wellFormed == 1) {
8156 goto reparse;
8157 }
8158 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008159}
8160
8161/**
8162 * xmlParseEndTag2:
8163 * @ctxt: an XML parser context
8164 * @line: line of the start tag
8165 * @nsNr: number of namespaces on the start tag
8166 *
8167 * parse an end of tag
8168 *
8169 * [42] ETag ::= '</' Name S? '>'
8170 *
8171 * With namespace
8172 *
8173 * [NS 9] ETag ::= '</' QName S? '>'
8174 */
8175
8176static void
8177xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008178 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008179 const xmlChar *name;
8180
8181 GROW;
8182 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008183 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008184 return;
8185 }
8186 SKIP(2);
8187
William M. Brack13dfa872004-09-18 04:52:08 +00008188 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008189 if (ctxt->input->cur[tlen] == '>') {
8190 ctxt->input->cur += tlen + 1;
8191 goto done;
8192 }
8193 ctxt->input->cur += tlen;
8194 name = (xmlChar*)1;
8195 } else {
8196 if (prefix == NULL)
8197 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8198 else
8199 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8200 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008201
8202 /*
8203 * We should definitely be at the ending "S? '>'" part
8204 */
8205 GROW;
8206 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008207 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008208 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 } else
8210 NEXT1;
8211
8212 /*
8213 * [ WFC: Element Type Match ]
8214 * The Name in an element's end-tag must match the element type in the
8215 * start-tag.
8216 *
8217 */
8218 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008219 if (name == NULL) name = BAD_CAST "unparseable";
8220 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008221 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008222 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008223 }
8224
8225 /*
8226 * SAX: End of Tag
8227 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008228done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008229 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8230 (!ctxt->disableSAX))
8231 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8232
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 spacePop(ctxt);
8234 if (nsNr != 0)
8235 nsPop(ctxt, nsNr);
8236 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008237}
8238
8239/**
Owen Taylor3473f882001-02-23 17:55:21 +00008240 * xmlParseCDSect:
8241 * @ctxt: an XML parser context
8242 *
8243 * Parse escaped pure raw content.
8244 *
8245 * [18] CDSect ::= CDStart CData CDEnd
8246 *
8247 * [19] CDStart ::= '<![CDATA['
8248 *
8249 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8250 *
8251 * [21] CDEnd ::= ']]>'
8252 */
8253void
8254xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8255 xmlChar *buf = NULL;
8256 int len = 0;
8257 int size = XML_PARSER_BUFFER_SIZE;
8258 int r, rl;
8259 int s, sl;
8260 int cur, l;
8261 int count = 0;
8262
Daniel Veillard8f597c32003-10-06 08:19:27 +00008263 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008264 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008265 SKIP(9);
8266 } else
8267 return;
8268
8269 ctxt->instate = XML_PARSER_CDATA_SECTION;
8270 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008271 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008272 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008273 ctxt->instate = XML_PARSER_CONTENT;
8274 return;
8275 }
8276 NEXTL(rl);
8277 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008278 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008279 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008280 ctxt->instate = XML_PARSER_CONTENT;
8281 return;
8282 }
8283 NEXTL(sl);
8284 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008285 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008286 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008287 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008288 return;
8289 }
William M. Brack871611b2003-10-18 04:53:14 +00008290 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008291 ((r != ']') || (s != ']') || (cur != '>'))) {
8292 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008293 xmlChar *tmp;
8294
Owen Taylor3473f882001-02-23 17:55:21 +00008295 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008296 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8297 if (tmp == NULL) {
8298 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008299 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008300 return;
8301 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008302 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008303 }
8304 COPY_BUF(rl,buf,len,r);
8305 r = s;
8306 rl = sl;
8307 s = cur;
8308 sl = l;
8309 count++;
8310 if (count > 50) {
8311 GROW;
8312 count = 0;
8313 }
8314 NEXTL(l);
8315 cur = CUR_CHAR(l);
8316 }
8317 buf[len] = 0;
8318 ctxt->instate = XML_PARSER_CONTENT;
8319 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008320 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008321 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008322 xmlFree(buf);
8323 return;
8324 }
8325 NEXTL(l);
8326
8327 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008328 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008329 */
8330 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8331 if (ctxt->sax->cdataBlock != NULL)
8332 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008333 else if (ctxt->sax->characters != NULL)
8334 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008335 }
8336 xmlFree(buf);
8337}
8338
8339/**
8340 * xmlParseContent:
8341 * @ctxt: an XML parser context
8342 *
8343 * Parse a content:
8344 *
8345 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8346 */
8347
8348void
8349xmlParseContent(xmlParserCtxtPtr ctxt) {
8350 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008351 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008352 ((RAW != '<') || (NXT(1) != '/'))) {
8353 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008354 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008355 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008356
8357 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008358 * First case : a Processing Instruction.
8359 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008360 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008361 xmlParsePI(ctxt);
8362 }
8363
8364 /*
8365 * Second case : a CDSection
8366 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008367 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008368 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008369 xmlParseCDSect(ctxt);
8370 }
8371
8372 /*
8373 * Third case : a comment
8374 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008375 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008376 (NXT(2) == '-') && (NXT(3) == '-')) {
8377 xmlParseComment(ctxt);
8378 ctxt->instate = XML_PARSER_CONTENT;
8379 }
8380
8381 /*
8382 * Fourth case : a sub-element.
8383 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008384 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008385 xmlParseElement(ctxt);
8386 }
8387
8388 /*
8389 * Fifth case : a reference. If if has not been resolved,
8390 * parsing returns it's Name, create the node
8391 */
8392
Daniel Veillard21a0f912001-02-25 19:54:14 +00008393 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008394 xmlParseReference(ctxt);
8395 }
8396
8397 /*
8398 * Last case, text. Note that References are handled directly.
8399 */
8400 else {
8401 xmlParseCharData(ctxt, 0);
8402 }
8403
8404 GROW;
8405 /*
8406 * Pop-up of finished entities.
8407 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008408 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008409 xmlPopInput(ctxt);
8410 SHRINK;
8411
Daniel Veillardfdc91562002-07-01 21:52:03 +00008412 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008413 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8414 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008415 ctxt->instate = XML_PARSER_EOF;
8416 break;
8417 }
8418 }
8419}
8420
8421/**
8422 * xmlParseElement:
8423 * @ctxt: an XML parser context
8424 *
8425 * parse an XML element, this is highly recursive
8426 *
8427 * [39] element ::= EmptyElemTag | STag content ETag
8428 *
8429 * [ WFC: Element Type Match ]
8430 * The Name in an element's end-tag must match the element type in the
8431 * start-tag.
8432 *
Owen Taylor3473f882001-02-23 17:55:21 +00008433 */
8434
8435void
8436xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008437 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 const xmlChar *prefix;
8439 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008441 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008443 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008444
8445 /* Capture start position */
8446 if (ctxt->record_info) {
8447 node_info.begin_pos = ctxt->input->consumed +
8448 (CUR_PTR - ctxt->input->base);
8449 node_info.begin_line = ctxt->input->line;
8450 }
8451
8452 if (ctxt->spaceNr == 0)
8453 spacePush(ctxt, -1);
8454 else
8455 spacePush(ctxt, *ctxt->space);
8456
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008457 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008458#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008459 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008460#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008461 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008462#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008463 else
8464 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008465#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008466 if (name == NULL) {
8467 spacePop(ctxt);
8468 return;
8469 }
8470 namePush(ctxt, name);
8471 ret = ctxt->node;
8472
Daniel Veillard4432df22003-09-28 18:58:27 +00008473#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008474 /*
8475 * [ VC: Root Element Type ]
8476 * The Name in the document type declaration must match the element
8477 * type of the root element.
8478 */
8479 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8480 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8481 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008482#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008483
8484 /*
8485 * Check for an Empty Element.
8486 */
8487 if ((RAW == '/') && (NXT(1) == '>')) {
8488 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008489 if (ctxt->sax2) {
8490 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8491 (!ctxt->disableSAX))
8492 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008493#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 } else {
8495 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8496 (!ctxt->disableSAX))
8497 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008498#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008499 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500 namePop(ctxt);
8501 spacePop(ctxt);
8502 if (nsNr != ctxt->nsNr)
8503 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008504 if ( ret != NULL && ctxt->record_info ) {
8505 node_info.end_pos = ctxt->input->consumed +
8506 (CUR_PTR - ctxt->input->base);
8507 node_info.end_line = ctxt->input->line;
8508 node_info.node = ret;
8509 xmlParserAddNodeInfo(ctxt, &node_info);
8510 }
8511 return;
8512 }
8513 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008514 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008516 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8517 "Couldn't find end of Start Tag %s line %d\n",
8518 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008519
8520 /*
8521 * end of parsing of this node.
8522 */
8523 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008524 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008525 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008526 if (nsNr != ctxt->nsNr)
8527 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008528
8529 /*
8530 * Capture end position and add node
8531 */
8532 if ( ret != NULL && ctxt->record_info ) {
8533 node_info.end_pos = ctxt->input->consumed +
8534 (CUR_PTR - ctxt->input->base);
8535 node_info.end_line = ctxt->input->line;
8536 node_info.node = ret;
8537 xmlParserAddNodeInfo(ctxt, &node_info);
8538 }
8539 return;
8540 }
8541
8542 /*
8543 * Parse the content of the element:
8544 */
8545 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008546 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008547 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008548 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008549 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008550
8551 /*
8552 * end of parsing of this node.
8553 */
8554 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008555 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008556 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008557 if (nsNr != ctxt->nsNr)
8558 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008559 return;
8560 }
8561
8562 /*
8563 * parse the end of tag: '</' should be here.
8564 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008565 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008566 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008567 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008568 }
8569#ifdef LIBXML_SAX1_ENABLED
8570 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008572#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008573
8574 /*
8575 * Capture end position and add node
8576 */
8577 if ( ret != NULL && ctxt->record_info ) {
8578 node_info.end_pos = ctxt->input->consumed +
8579 (CUR_PTR - ctxt->input->base);
8580 node_info.end_line = ctxt->input->line;
8581 node_info.node = ret;
8582 xmlParserAddNodeInfo(ctxt, &node_info);
8583 }
8584}
8585
8586/**
8587 * xmlParseVersionNum:
8588 * @ctxt: an XML parser context
8589 *
8590 * parse the XML version value.
8591 *
8592 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8593 *
8594 * Returns the string giving the XML version number, or NULL
8595 */
8596xmlChar *
8597xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8598 xmlChar *buf = NULL;
8599 int len = 0;
8600 int size = 10;
8601 xmlChar cur;
8602
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008603 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008604 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008605 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008606 return(NULL);
8607 }
8608 cur = CUR;
8609 while (((cur >= 'a') && (cur <= 'z')) ||
8610 ((cur >= 'A') && (cur <= 'Z')) ||
8611 ((cur >= '0') && (cur <= '9')) ||
8612 (cur == '_') || (cur == '.') ||
8613 (cur == ':') || (cur == '-')) {
8614 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008615 xmlChar *tmp;
8616
Owen Taylor3473f882001-02-23 17:55:21 +00008617 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008618 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8619 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008620 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008621 return(NULL);
8622 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008623 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008624 }
8625 buf[len++] = cur;
8626 NEXT;
8627 cur=CUR;
8628 }
8629 buf[len] = 0;
8630 return(buf);
8631}
8632
8633/**
8634 * xmlParseVersionInfo:
8635 * @ctxt: an XML parser context
8636 *
8637 * parse the XML version.
8638 *
8639 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8640 *
8641 * [25] Eq ::= S? '=' S?
8642 *
8643 * Returns the version string, e.g. "1.0"
8644 */
8645
8646xmlChar *
8647xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8648 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008649
Daniel Veillarda07050d2003-10-19 14:46:32 +00008650 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008651 SKIP(7);
8652 SKIP_BLANKS;
8653 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008654 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 return(NULL);
8656 }
8657 NEXT;
8658 SKIP_BLANKS;
8659 if (RAW == '"') {
8660 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008661 version = xmlParseVersionNum(ctxt);
8662 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008663 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008664 } else
8665 NEXT;
8666 } else if (RAW == '\''){
8667 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008668 version = xmlParseVersionNum(ctxt);
8669 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008670 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008671 } else
8672 NEXT;
8673 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008674 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008675 }
8676 }
8677 return(version);
8678}
8679
8680/**
8681 * xmlParseEncName:
8682 * @ctxt: an XML parser context
8683 *
8684 * parse the XML encoding name
8685 *
8686 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8687 *
8688 * Returns the encoding name value or NULL
8689 */
8690xmlChar *
8691xmlParseEncName(xmlParserCtxtPtr ctxt) {
8692 xmlChar *buf = NULL;
8693 int len = 0;
8694 int size = 10;
8695 xmlChar cur;
8696
8697 cur = CUR;
8698 if (((cur >= 'a') && (cur <= 'z')) ||
8699 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008700 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008701 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008702 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008703 return(NULL);
8704 }
8705
8706 buf[len++] = cur;
8707 NEXT;
8708 cur = CUR;
8709 while (((cur >= 'a') && (cur <= 'z')) ||
8710 ((cur >= 'A') && (cur <= 'Z')) ||
8711 ((cur >= '0') && (cur <= '9')) ||
8712 (cur == '.') || (cur == '_') ||
8713 (cur == '-')) {
8714 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008715 xmlChar *tmp;
8716
Owen Taylor3473f882001-02-23 17:55:21 +00008717 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008718 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8719 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008720 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008721 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008722 return(NULL);
8723 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008724 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008725 }
8726 buf[len++] = cur;
8727 NEXT;
8728 cur = CUR;
8729 if (cur == 0) {
8730 SHRINK;
8731 GROW;
8732 cur = CUR;
8733 }
8734 }
8735 buf[len] = 0;
8736 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008737 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008738 }
8739 return(buf);
8740}
8741
8742/**
8743 * xmlParseEncodingDecl:
8744 * @ctxt: an XML parser context
8745 *
8746 * parse the XML encoding declaration
8747 *
8748 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8749 *
8750 * this setups the conversion filters.
8751 *
8752 * Returns the encoding value or NULL
8753 */
8754
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008755const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008756xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8757 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008758
8759 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008760 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008761 SKIP(8);
8762 SKIP_BLANKS;
8763 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008764 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008765 return(NULL);
8766 }
8767 NEXT;
8768 SKIP_BLANKS;
8769 if (RAW == '"') {
8770 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008771 encoding = xmlParseEncName(ctxt);
8772 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008773 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008774 } else
8775 NEXT;
8776 } else if (RAW == '\''){
8777 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008778 encoding = xmlParseEncName(ctxt);
8779 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008780 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008781 } else
8782 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008784 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008785 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008786 /*
8787 * UTF-16 encoding stwich has already taken place at this stage,
8788 * more over the little-endian/big-endian selection is already done
8789 */
8790 if ((encoding != NULL) &&
8791 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8792 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008793 if (ctxt->encoding != NULL)
8794 xmlFree((xmlChar *) ctxt->encoding);
8795 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008796 }
8797 /*
8798 * UTF-8 encoding is handled natively
8799 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008800 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008801 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8802 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008803 if (ctxt->encoding != NULL)
8804 xmlFree((xmlChar *) ctxt->encoding);
8805 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008806 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008807 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008808 xmlCharEncodingHandlerPtr handler;
8809
8810 if (ctxt->input->encoding != NULL)
8811 xmlFree((xmlChar *) ctxt->input->encoding);
8812 ctxt->input->encoding = encoding;
8813
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008814 handler = xmlFindCharEncodingHandler((const char *) encoding);
8815 if (handler != NULL) {
8816 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008817 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008818 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008819 "Unsupported encoding %s\n", encoding);
8820 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008821 }
8822 }
8823 }
8824 return(encoding);
8825}
8826
8827/**
8828 * xmlParseSDDecl:
8829 * @ctxt: an XML parser context
8830 *
8831 * parse the XML standalone declaration
8832 *
8833 * [32] SDDecl ::= S 'standalone' Eq
8834 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8835 *
8836 * [ VC: Standalone Document Declaration ]
8837 * TODO The standalone document declaration must have the value "no"
8838 * if any external markup declarations contain declarations of:
8839 * - attributes with default values, if elements to which these
8840 * attributes apply appear in the document without specifications
8841 * of values for these attributes, or
8842 * - entities (other than amp, lt, gt, apos, quot), if references
8843 * to those entities appear in the document, or
8844 * - attributes with values subject to normalization, where the
8845 * attribute appears in the document with a value which will change
8846 * as a result of normalization, or
8847 * - element types with element content, if white space occurs directly
8848 * within any instance of those types.
8849 *
8850 * Returns 1 if standalone, 0 otherwise
8851 */
8852
8853int
8854xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8855 int standalone = -1;
8856
8857 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008858 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008859 SKIP(10);
8860 SKIP_BLANKS;
8861 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008862 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008863 return(standalone);
8864 }
8865 NEXT;
8866 SKIP_BLANKS;
8867 if (RAW == '\''){
8868 NEXT;
8869 if ((RAW == 'n') && (NXT(1) == 'o')) {
8870 standalone = 0;
8871 SKIP(2);
8872 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8873 (NXT(2) == 's')) {
8874 standalone = 1;
8875 SKIP(3);
8876 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008877 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008878 }
8879 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008880 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008881 } else
8882 NEXT;
8883 } else if (RAW == '"'){
8884 NEXT;
8885 if ((RAW == 'n') && (NXT(1) == 'o')) {
8886 standalone = 0;
8887 SKIP(2);
8888 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8889 (NXT(2) == 's')) {
8890 standalone = 1;
8891 SKIP(3);
8892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008893 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008894 }
8895 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008896 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008897 } else
8898 NEXT;
8899 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008900 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008901 }
8902 }
8903 return(standalone);
8904}
8905
8906/**
8907 * xmlParseXMLDecl:
8908 * @ctxt: an XML parser context
8909 *
8910 * parse an XML declaration header
8911 *
8912 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8913 */
8914
8915void
8916xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8917 xmlChar *version;
8918
8919 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00008920 * This value for standalone indicates that the document has an
8921 * XML declaration but it does not have a standalone attribute.
8922 * It will be overwritten later if a standalone attribute is found.
8923 */
8924 ctxt->input->standalone = -2;
8925
8926 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008927 * We know that '<?xml' is here.
8928 */
8929 SKIP(5);
8930
William M. Brack76e95df2003-10-18 16:20:14 +00008931 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8933 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008934 }
8935 SKIP_BLANKS;
8936
8937 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008938 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008939 */
8940 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008941 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008942 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008943 } else {
8944 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8945 /*
8946 * TODO: Blueberry should be detected here
8947 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008948 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8949 "Unsupported version '%s'\n",
8950 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008951 }
8952 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008953 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008954 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008955 }
Owen Taylor3473f882001-02-23 17:55:21 +00008956
8957 /*
8958 * We may have the encoding declaration
8959 */
William M. Brack76e95df2003-10-18 16:20:14 +00008960 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008961 if ((RAW == '?') && (NXT(1) == '>')) {
8962 SKIP(2);
8963 return;
8964 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008966 }
8967 xmlParseEncodingDecl(ctxt);
8968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8969 /*
8970 * The XML REC instructs us to stop parsing right here
8971 */
8972 return;
8973 }
8974
8975 /*
8976 * We may have the standalone status.
8977 */
William M. Brack76e95df2003-10-18 16:20:14 +00008978 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008979 if ((RAW == '?') && (NXT(1) == '>')) {
8980 SKIP(2);
8981 return;
8982 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008983 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008984 }
8985 SKIP_BLANKS;
8986 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8987
8988 SKIP_BLANKS;
8989 if ((RAW == '?') && (NXT(1) == '>')) {
8990 SKIP(2);
8991 } else if (RAW == '>') {
8992 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008993 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008994 NEXT;
8995 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008996 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008997 MOVETO_ENDTAG(CUR_PTR);
8998 NEXT;
8999 }
9000}
9001
9002/**
9003 * xmlParseMisc:
9004 * @ctxt: an XML parser context
9005 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009006 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009007 *
9008 * [27] Misc ::= Comment | PI | S
9009 */
9010
9011void
9012xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009013 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009014 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009015 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009016 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009017 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009018 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009019 NEXT;
9020 } else
9021 xmlParseComment(ctxt);
9022 }
9023}
9024
9025/**
9026 * xmlParseDocument:
9027 * @ctxt: an XML parser context
9028 *
9029 * parse an XML document (and build a tree if using the standard SAX
9030 * interface).
9031 *
9032 * [1] document ::= prolog element Misc*
9033 *
9034 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9035 *
9036 * Returns 0, -1 in case of error. the parser context is augmented
9037 * as a result of the parsing.
9038 */
9039
9040int
9041xmlParseDocument(xmlParserCtxtPtr ctxt) {
9042 xmlChar start[4];
9043 xmlCharEncoding enc;
9044
9045 xmlInitParser();
9046
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009047 if ((ctxt == NULL) || (ctxt->input == NULL))
9048 return(-1);
9049
Owen Taylor3473f882001-02-23 17:55:21 +00009050 GROW;
9051
9052 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053 * SAX: detecting the level.
9054 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009055 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056
9057 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009058 * SAX: beginning of the document processing.
9059 */
9060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9061 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9062
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009063 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9064 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009065 /*
9066 * Get the 4 first bytes and decode the charset
9067 * if enc != XML_CHAR_ENCODING_NONE
9068 * plug some encoding conversion routines.
9069 */
9070 start[0] = RAW;
9071 start[1] = NXT(1);
9072 start[2] = NXT(2);
9073 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009074 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009075 if (enc != XML_CHAR_ENCODING_NONE) {
9076 xmlSwitchEncoding(ctxt, enc);
9077 }
Owen Taylor3473f882001-02-23 17:55:21 +00009078 }
9079
9080
9081 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009082 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009083 }
9084
9085 /*
9086 * Check for the XMLDecl in the Prolog.
9087 */
9088 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009089 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009090
9091 /*
9092 * Note that we will switch encoding on the fly.
9093 */
9094 xmlParseXMLDecl(ctxt);
9095 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9096 /*
9097 * The XML REC instructs us to stop parsing right here
9098 */
9099 return(-1);
9100 }
9101 ctxt->standalone = ctxt->input->standalone;
9102 SKIP_BLANKS;
9103 } else {
9104 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9105 }
9106 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9107 ctxt->sax->startDocument(ctxt->userData);
9108
9109 /*
9110 * The Misc part of the Prolog
9111 */
9112 GROW;
9113 xmlParseMisc(ctxt);
9114
9115 /*
9116 * Then possibly doc type declaration(s) and more Misc
9117 * (doctypedecl Misc*)?
9118 */
9119 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009120 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009121
9122 ctxt->inSubset = 1;
9123 xmlParseDocTypeDecl(ctxt);
9124 if (RAW == '[') {
9125 ctxt->instate = XML_PARSER_DTD;
9126 xmlParseInternalSubset(ctxt);
9127 }
9128
9129 /*
9130 * Create and update the external subset.
9131 */
9132 ctxt->inSubset = 2;
9133 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9134 (!ctxt->disableSAX))
9135 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9136 ctxt->extSubSystem, ctxt->extSubURI);
9137 ctxt->inSubset = 0;
9138
9139
9140 ctxt->instate = XML_PARSER_PROLOG;
9141 xmlParseMisc(ctxt);
9142 }
9143
9144 /*
9145 * Time to start parsing the tree itself
9146 */
9147 GROW;
9148 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009149 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9150 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009151 } else {
9152 ctxt->instate = XML_PARSER_CONTENT;
9153 xmlParseElement(ctxt);
9154 ctxt->instate = XML_PARSER_EPILOG;
9155
9156
9157 /*
9158 * The Misc part at the end
9159 */
9160 xmlParseMisc(ctxt);
9161
Daniel Veillard561b7f82002-03-20 21:55:57 +00009162 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009163 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009164 }
9165 ctxt->instate = XML_PARSER_EOF;
9166 }
9167
9168 /*
9169 * SAX: end of the document processing.
9170 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009171 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009172 ctxt->sax->endDocument(ctxt->userData);
9173
Daniel Veillard5997aca2002-03-18 18:36:20 +00009174 /*
9175 * Remove locally kept entity definitions if the tree was not built
9176 */
9177 if ((ctxt->myDoc != NULL) &&
9178 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9179 xmlFreeDoc(ctxt->myDoc);
9180 ctxt->myDoc = NULL;
9181 }
9182
Daniel Veillardc7612992002-02-17 22:47:37 +00009183 if (! ctxt->wellFormed) {
9184 ctxt->valid = 0;
9185 return(-1);
9186 }
Owen Taylor3473f882001-02-23 17:55:21 +00009187 return(0);
9188}
9189
9190/**
9191 * xmlParseExtParsedEnt:
9192 * @ctxt: an XML parser context
9193 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009194 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009195 * An external general parsed entity is well-formed if it matches the
9196 * production labeled extParsedEnt.
9197 *
9198 * [78] extParsedEnt ::= TextDecl? content
9199 *
9200 * Returns 0, -1 in case of error. the parser context is augmented
9201 * as a result of the parsing.
9202 */
9203
9204int
9205xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9206 xmlChar start[4];
9207 xmlCharEncoding enc;
9208
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009209 if ((ctxt == NULL) || (ctxt->input == NULL))
9210 return(-1);
9211
Owen Taylor3473f882001-02-23 17:55:21 +00009212 xmlDefaultSAXHandlerInit();
9213
Daniel Veillard309f81d2003-09-23 09:02:53 +00009214 xmlDetectSAX2(ctxt);
9215
Owen Taylor3473f882001-02-23 17:55:21 +00009216 GROW;
9217
9218 /*
9219 * SAX: beginning of the document processing.
9220 */
9221 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9222 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9223
9224 /*
9225 * Get the 4 first bytes and decode the charset
9226 * if enc != XML_CHAR_ENCODING_NONE
9227 * plug some encoding conversion routines.
9228 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009229 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9230 start[0] = RAW;
9231 start[1] = NXT(1);
9232 start[2] = NXT(2);
9233 start[3] = NXT(3);
9234 enc = xmlDetectCharEncoding(start, 4);
9235 if (enc != XML_CHAR_ENCODING_NONE) {
9236 xmlSwitchEncoding(ctxt, enc);
9237 }
Owen Taylor3473f882001-02-23 17:55:21 +00009238 }
9239
9240
9241 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009242 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009243 }
9244
9245 /*
9246 * Check for the XMLDecl in the Prolog.
9247 */
9248 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009249 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009250
9251 /*
9252 * Note that we will switch encoding on the fly.
9253 */
9254 xmlParseXMLDecl(ctxt);
9255 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9256 /*
9257 * The XML REC instructs us to stop parsing right here
9258 */
9259 return(-1);
9260 }
9261 SKIP_BLANKS;
9262 } else {
9263 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9264 }
9265 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9266 ctxt->sax->startDocument(ctxt->userData);
9267
9268 /*
9269 * Doing validity checking on chunk doesn't make sense
9270 */
9271 ctxt->instate = XML_PARSER_CONTENT;
9272 ctxt->validate = 0;
9273 ctxt->loadsubset = 0;
9274 ctxt->depth = 0;
9275
9276 xmlParseContent(ctxt);
9277
9278 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009279 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009280 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009281 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009282 }
9283
9284 /*
9285 * SAX: end of the document processing.
9286 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009287 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009288 ctxt->sax->endDocument(ctxt->userData);
9289
9290 if (! ctxt->wellFormed) return(-1);
9291 return(0);
9292}
9293
Daniel Veillard73b013f2003-09-30 12:36:01 +00009294#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009295/************************************************************************
9296 * *
9297 * Progressive parsing interfaces *
9298 * *
9299 ************************************************************************/
9300
9301/**
9302 * xmlParseLookupSequence:
9303 * @ctxt: an XML parser context
9304 * @first: the first char to lookup
9305 * @next: the next char to lookup or zero
9306 * @third: the next char to lookup or zero
9307 *
9308 * Try to find if a sequence (first, next, third) or just (first next) or
9309 * (first) is available in the input stream.
9310 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9311 * to avoid rescanning sequences of bytes, it DOES change the state of the
9312 * parser, do not use liberally.
9313 *
9314 * Returns the index to the current parsing point if the full sequence
9315 * is available, -1 otherwise.
9316 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009317static int
Owen Taylor3473f882001-02-23 17:55:21 +00009318xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9319 xmlChar next, xmlChar third) {
9320 int base, len;
9321 xmlParserInputPtr in;
9322 const xmlChar *buf;
9323
9324 in = ctxt->input;
9325 if (in == NULL) return(-1);
9326 base = in->cur - in->base;
9327 if (base < 0) return(-1);
9328 if (ctxt->checkIndex > base)
9329 base = ctxt->checkIndex;
9330 if (in->buf == NULL) {
9331 buf = in->base;
9332 len = in->length;
9333 } else {
9334 buf = in->buf->buffer->content;
9335 len = in->buf->buffer->use;
9336 }
9337 /* take into account the sequence length */
9338 if (third) len -= 2;
9339 else if (next) len --;
9340 for (;base < len;base++) {
9341 if (buf[base] == first) {
9342 if (third != 0) {
9343 if ((buf[base + 1] != next) ||
9344 (buf[base + 2] != third)) continue;
9345 } else if (next != 0) {
9346 if (buf[base + 1] != next) continue;
9347 }
9348 ctxt->checkIndex = 0;
9349#ifdef DEBUG_PUSH
9350 if (next == 0)
9351 xmlGenericError(xmlGenericErrorContext,
9352 "PP: lookup '%c' found at %d\n",
9353 first, base);
9354 else if (third == 0)
9355 xmlGenericError(xmlGenericErrorContext,
9356 "PP: lookup '%c%c' found at %d\n",
9357 first, next, base);
9358 else
9359 xmlGenericError(xmlGenericErrorContext,
9360 "PP: lookup '%c%c%c' found at %d\n",
9361 first, next, third, base);
9362#endif
9363 return(base - (in->cur - in->base));
9364 }
9365 }
9366 ctxt->checkIndex = base;
9367#ifdef DEBUG_PUSH
9368 if (next == 0)
9369 xmlGenericError(xmlGenericErrorContext,
9370 "PP: lookup '%c' failed\n", first);
9371 else if (third == 0)
9372 xmlGenericError(xmlGenericErrorContext,
9373 "PP: lookup '%c%c' failed\n", first, next);
9374 else
9375 xmlGenericError(xmlGenericErrorContext,
9376 "PP: lookup '%c%c%c' failed\n", first, next, third);
9377#endif
9378 return(-1);
9379}
9380
9381/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009382 * xmlParseGetLasts:
9383 * @ctxt: an XML parser context
9384 * @lastlt: pointer to store the last '<' from the input
9385 * @lastgt: pointer to store the last '>' from the input
9386 *
9387 * Lookup the last < and > in the current chunk
9388 */
9389static void
9390xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9391 const xmlChar **lastgt) {
9392 const xmlChar *tmp;
9393
9394 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9395 xmlGenericError(xmlGenericErrorContext,
9396 "Internal error: xmlParseGetLasts\n");
9397 return;
9398 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009399 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009400 tmp = ctxt->input->end;
9401 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009402 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009403 if (tmp < ctxt->input->base) {
9404 *lastlt = NULL;
9405 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009406 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009407 *lastlt = tmp;
9408 tmp++;
9409 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9410 if (*tmp == '\'') {
9411 tmp++;
9412 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9413 if (tmp < ctxt->input->end) tmp++;
9414 } else if (*tmp == '"') {
9415 tmp++;
9416 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9417 if (tmp < ctxt->input->end) tmp++;
9418 } else
9419 tmp++;
9420 }
9421 if (tmp < ctxt->input->end)
9422 *lastgt = tmp;
9423 else {
9424 tmp = *lastlt;
9425 tmp--;
9426 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9427 if (tmp >= ctxt->input->base)
9428 *lastgt = tmp;
9429 else
9430 *lastgt = NULL;
9431 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009432 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009433 } else {
9434 *lastlt = NULL;
9435 *lastgt = NULL;
9436 }
9437}
9438/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009439 * xmlCheckCdataPush:
9440 * @cur: pointer to the bock of characters
9441 * @len: length of the block in bytes
9442 *
9443 * Check that the block of characters is okay as SCdata content [20]
9444 *
9445 * Returns the number of bytes to pass if okay, a negative index where an
9446 * UTF-8 error occured otherwise
9447 */
9448static int
9449xmlCheckCdataPush(const xmlChar *utf, int len) {
9450 int ix;
9451 unsigned char c;
9452 int codepoint;
9453
9454 if ((utf == NULL) || (len <= 0))
9455 return(0);
9456
9457 for (ix = 0; ix < len;) { /* string is 0-terminated */
9458 c = utf[ix];
9459 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9460 if (c >= 0x20)
9461 ix++;
9462 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9463 ix++;
9464 else
9465 return(-ix);
9466 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9467 if (ix + 2 > len) return(ix);
9468 if ((utf[ix+1] & 0xc0 ) != 0x80)
9469 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009470 codepoint = (utf[ix] & 0x1f) << 6;
9471 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009472 if (!xmlIsCharQ(codepoint))
9473 return(-ix);
9474 ix += 2;
9475 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9476 if (ix + 3 > len) return(ix);
9477 if (((utf[ix+1] & 0xc0) != 0x80) ||
9478 ((utf[ix+2] & 0xc0) != 0x80))
9479 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009480 codepoint = (utf[ix] & 0xf) << 12;
9481 codepoint |= (utf[ix+1] & 0x3f) << 6;
9482 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009483 if (!xmlIsCharQ(codepoint))
9484 return(-ix);
9485 ix += 3;
9486 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9487 if (ix + 4 > len) return(ix);
9488 if (((utf[ix+1] & 0xc0) != 0x80) ||
9489 ((utf[ix+2] & 0xc0) != 0x80) ||
9490 ((utf[ix+3] & 0xc0) != 0x80))
9491 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009492 codepoint = (utf[ix] & 0x7) << 18;
9493 codepoint |= (utf[ix+1] & 0x3f) << 12;
9494 codepoint |= (utf[ix+2] & 0x3f) << 6;
9495 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009496 if (!xmlIsCharQ(codepoint))
9497 return(-ix);
9498 ix += 4;
9499 } else /* unknown encoding */
9500 return(-ix);
9501 }
9502 return(ix);
9503}
9504
9505/**
Owen Taylor3473f882001-02-23 17:55:21 +00009506 * xmlParseTryOrFinish:
9507 * @ctxt: an XML parser context
9508 * @terminate: last chunk indicator
9509 *
9510 * Try to progress on parsing
9511 *
9512 * Returns zero if no parsing was possible
9513 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009514static int
Owen Taylor3473f882001-02-23 17:55:21 +00009515xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9516 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009517 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009518 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009519 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009520
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009521 if (ctxt->input == NULL)
9522 return(0);
9523
Owen Taylor3473f882001-02-23 17:55:21 +00009524#ifdef DEBUG_PUSH
9525 switch (ctxt->instate) {
9526 case XML_PARSER_EOF:
9527 xmlGenericError(xmlGenericErrorContext,
9528 "PP: try EOF\n"); break;
9529 case XML_PARSER_START:
9530 xmlGenericError(xmlGenericErrorContext,
9531 "PP: try START\n"); break;
9532 case XML_PARSER_MISC:
9533 xmlGenericError(xmlGenericErrorContext,
9534 "PP: try MISC\n");break;
9535 case XML_PARSER_COMMENT:
9536 xmlGenericError(xmlGenericErrorContext,
9537 "PP: try COMMENT\n");break;
9538 case XML_PARSER_PROLOG:
9539 xmlGenericError(xmlGenericErrorContext,
9540 "PP: try PROLOG\n");break;
9541 case XML_PARSER_START_TAG:
9542 xmlGenericError(xmlGenericErrorContext,
9543 "PP: try START_TAG\n");break;
9544 case XML_PARSER_CONTENT:
9545 xmlGenericError(xmlGenericErrorContext,
9546 "PP: try CONTENT\n");break;
9547 case XML_PARSER_CDATA_SECTION:
9548 xmlGenericError(xmlGenericErrorContext,
9549 "PP: try CDATA_SECTION\n");break;
9550 case XML_PARSER_END_TAG:
9551 xmlGenericError(xmlGenericErrorContext,
9552 "PP: try END_TAG\n");break;
9553 case XML_PARSER_ENTITY_DECL:
9554 xmlGenericError(xmlGenericErrorContext,
9555 "PP: try ENTITY_DECL\n");break;
9556 case XML_PARSER_ENTITY_VALUE:
9557 xmlGenericError(xmlGenericErrorContext,
9558 "PP: try ENTITY_VALUE\n");break;
9559 case XML_PARSER_ATTRIBUTE_VALUE:
9560 xmlGenericError(xmlGenericErrorContext,
9561 "PP: try ATTRIBUTE_VALUE\n");break;
9562 case XML_PARSER_DTD:
9563 xmlGenericError(xmlGenericErrorContext,
9564 "PP: try DTD\n");break;
9565 case XML_PARSER_EPILOG:
9566 xmlGenericError(xmlGenericErrorContext,
9567 "PP: try EPILOG\n");break;
9568 case XML_PARSER_PI:
9569 xmlGenericError(xmlGenericErrorContext,
9570 "PP: try PI\n");break;
9571 case XML_PARSER_IGNORE:
9572 xmlGenericError(xmlGenericErrorContext,
9573 "PP: try IGNORE\n");break;
9574 }
9575#endif
9576
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009577 if ((ctxt->input != NULL) &&
9578 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009579 xmlSHRINK(ctxt);
9580 ctxt->checkIndex = 0;
9581 }
9582 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009583
Daniel Veillarda880b122003-04-21 21:36:41 +00009584 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009585 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009586 return(0);
9587
9588
Owen Taylor3473f882001-02-23 17:55:21 +00009589 /*
9590 * Pop-up of finished entities.
9591 */
9592 while ((RAW == 0) && (ctxt->inputNr > 1))
9593 xmlPopInput(ctxt);
9594
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009595 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009596 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009597 avail = ctxt->input->length -
9598 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009599 else {
9600 /*
9601 * If we are operating on converted input, try to flush
9602 * remainng chars to avoid them stalling in the non-converted
9603 * buffer.
9604 */
9605 if ((ctxt->input->buf->raw != NULL) &&
9606 (ctxt->input->buf->raw->use > 0)) {
9607 int base = ctxt->input->base -
9608 ctxt->input->buf->buffer->content;
9609 int current = ctxt->input->cur - ctxt->input->base;
9610
9611 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9612 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9613 ctxt->input->cur = ctxt->input->base + current;
9614 ctxt->input->end =
9615 &ctxt->input->buf->buffer->content[
9616 ctxt->input->buf->buffer->use];
9617 }
9618 avail = ctxt->input->buf->buffer->use -
9619 (ctxt->input->cur - ctxt->input->base);
9620 }
Owen Taylor3473f882001-02-23 17:55:21 +00009621 if (avail < 1)
9622 goto done;
9623 switch (ctxt->instate) {
9624 case XML_PARSER_EOF:
9625 /*
9626 * Document parsing is done !
9627 */
9628 goto done;
9629 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009630 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9631 xmlChar start[4];
9632 xmlCharEncoding enc;
9633
9634 /*
9635 * Very first chars read from the document flow.
9636 */
9637 if (avail < 4)
9638 goto done;
9639
9640 /*
9641 * Get the 4 first bytes and decode the charset
9642 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009643 * plug some encoding conversion routines,
9644 * else xmlSwitchEncoding will set to (default)
9645 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009646 */
9647 start[0] = RAW;
9648 start[1] = NXT(1);
9649 start[2] = NXT(2);
9650 start[3] = NXT(3);
9651 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009652 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009653 break;
9654 }
Owen Taylor3473f882001-02-23 17:55:21 +00009655
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009656 if (avail < 2)
9657 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009658 cur = ctxt->input->cur[0];
9659 next = ctxt->input->cur[1];
9660 if (cur == 0) {
9661 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9662 ctxt->sax->setDocumentLocator(ctxt->userData,
9663 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009664 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009665 ctxt->instate = XML_PARSER_EOF;
9666#ifdef DEBUG_PUSH
9667 xmlGenericError(xmlGenericErrorContext,
9668 "PP: entering EOF\n");
9669#endif
9670 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9671 ctxt->sax->endDocument(ctxt->userData);
9672 goto done;
9673 }
9674 if ((cur == '<') && (next == '?')) {
9675 /* PI or XML decl */
9676 if (avail < 5) return(ret);
9677 if ((!terminate) &&
9678 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9679 return(ret);
9680 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9681 ctxt->sax->setDocumentLocator(ctxt->userData,
9682 &xmlDefaultSAXLocator);
9683 if ((ctxt->input->cur[2] == 'x') &&
9684 (ctxt->input->cur[3] == 'm') &&
9685 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009686 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009687 ret += 5;
9688#ifdef DEBUG_PUSH
9689 xmlGenericError(xmlGenericErrorContext,
9690 "PP: Parsing XML Decl\n");
9691#endif
9692 xmlParseXMLDecl(ctxt);
9693 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9694 /*
9695 * The XML REC instructs us to stop parsing right
9696 * here
9697 */
9698 ctxt->instate = XML_PARSER_EOF;
9699 return(0);
9700 }
9701 ctxt->standalone = ctxt->input->standalone;
9702 if ((ctxt->encoding == NULL) &&
9703 (ctxt->input->encoding != NULL))
9704 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9705 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9706 (!ctxt->disableSAX))
9707 ctxt->sax->startDocument(ctxt->userData);
9708 ctxt->instate = XML_PARSER_MISC;
9709#ifdef DEBUG_PUSH
9710 xmlGenericError(xmlGenericErrorContext,
9711 "PP: entering MISC\n");
9712#endif
9713 } else {
9714 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9715 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9716 (!ctxt->disableSAX))
9717 ctxt->sax->startDocument(ctxt->userData);
9718 ctxt->instate = XML_PARSER_MISC;
9719#ifdef DEBUG_PUSH
9720 xmlGenericError(xmlGenericErrorContext,
9721 "PP: entering MISC\n");
9722#endif
9723 }
9724 } else {
9725 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9726 ctxt->sax->setDocumentLocator(ctxt->userData,
9727 &xmlDefaultSAXLocator);
9728 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009729 if (ctxt->version == NULL) {
9730 xmlErrMemory(ctxt, NULL);
9731 break;
9732 }
Owen Taylor3473f882001-02-23 17:55:21 +00009733 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9734 (!ctxt->disableSAX))
9735 ctxt->sax->startDocument(ctxt->userData);
9736 ctxt->instate = XML_PARSER_MISC;
9737#ifdef DEBUG_PUSH
9738 xmlGenericError(xmlGenericErrorContext,
9739 "PP: entering MISC\n");
9740#endif
9741 }
9742 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009743 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009744 const xmlChar *name;
9745 const xmlChar *prefix;
9746 const xmlChar *URI;
9747 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009748
9749 if ((avail < 2) && (ctxt->inputNr == 1))
9750 goto done;
9751 cur = ctxt->input->cur[0];
9752 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009753 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009754 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009755 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9756 ctxt->sax->endDocument(ctxt->userData);
9757 goto done;
9758 }
9759 if (!terminate) {
9760 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009761 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009762 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009763 goto done;
9764 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9765 goto done;
9766 }
9767 }
9768 if (ctxt->spaceNr == 0)
9769 spacePush(ctxt, -1);
9770 else
9771 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009772#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009773 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009774#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009775 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009776#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009777 else
9778 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009779#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009780 if (name == NULL) {
9781 spacePop(ctxt);
9782 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009783 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9784 ctxt->sax->endDocument(ctxt->userData);
9785 goto done;
9786 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009787#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009788 /*
9789 * [ VC: Root Element Type ]
9790 * The Name in the document type declaration must match
9791 * the element type of the root element.
9792 */
9793 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9794 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9795 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009796#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009797
9798 /*
9799 * Check for an Empty Element.
9800 */
9801 if ((RAW == '/') && (NXT(1) == '>')) {
9802 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009803
9804 if (ctxt->sax2) {
9805 if ((ctxt->sax != NULL) &&
9806 (ctxt->sax->endElementNs != NULL) &&
9807 (!ctxt->disableSAX))
9808 ctxt->sax->endElementNs(ctxt->userData, name,
9809 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009810 if (ctxt->nsNr - nsNr > 0)
9811 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009812#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009813 } else {
9814 if ((ctxt->sax != NULL) &&
9815 (ctxt->sax->endElement != NULL) &&
9816 (!ctxt->disableSAX))
9817 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009818#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009820 spacePop(ctxt);
9821 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009822 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009823 } else {
9824 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009825 }
9826 break;
9827 }
9828 if (RAW == '>') {
9829 NEXT;
9830 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009831 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009832 "Couldn't find end of Start Tag %s\n",
9833 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009834 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009835 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009836 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009837 if (ctxt->sax2)
9838 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009839#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009840 else
9841 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009842#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009843
Daniel Veillarda880b122003-04-21 21:36:41 +00009844 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009845 break;
9846 }
9847 case XML_PARSER_CONTENT: {
9848 const xmlChar *test;
9849 unsigned int cons;
9850 if ((avail < 2) && (ctxt->inputNr == 1))
9851 goto done;
9852 cur = ctxt->input->cur[0];
9853 next = ctxt->input->cur[1];
9854
9855 test = CUR_PTR;
9856 cons = ctxt->input->consumed;
9857 if ((cur == '<') && (next == '/')) {
9858 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009859 break;
9860 } else if ((cur == '<') && (next == '?')) {
9861 if ((!terminate) &&
9862 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9863 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009864 xmlParsePI(ctxt);
9865 } else if ((cur == '<') && (next != '!')) {
9866 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009867 break;
9868 } else if ((cur == '<') && (next == '!') &&
9869 (ctxt->input->cur[2] == '-') &&
9870 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009871 int term;
9872
9873 if (avail < 4)
9874 goto done;
9875 ctxt->input->cur += 4;
9876 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9877 ctxt->input->cur -= 4;
9878 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009879 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009880 xmlParseComment(ctxt);
9881 ctxt->instate = XML_PARSER_CONTENT;
9882 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9883 (ctxt->input->cur[2] == '[') &&
9884 (ctxt->input->cur[3] == 'C') &&
9885 (ctxt->input->cur[4] == 'D') &&
9886 (ctxt->input->cur[5] == 'A') &&
9887 (ctxt->input->cur[6] == 'T') &&
9888 (ctxt->input->cur[7] == 'A') &&
9889 (ctxt->input->cur[8] == '[')) {
9890 SKIP(9);
9891 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009892 break;
9893 } else if ((cur == '<') && (next == '!') &&
9894 (avail < 9)) {
9895 goto done;
9896 } else if (cur == '&') {
9897 if ((!terminate) &&
9898 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9899 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009900 xmlParseReference(ctxt);
9901 } else {
9902 /* TODO Avoid the extra copy, handle directly !!! */
9903 /*
9904 * Goal of the following test is:
9905 * - minimize calls to the SAX 'character' callback
9906 * when they are mergeable
9907 * - handle an problem for isBlank when we only parse
9908 * a sequence of blank chars and the next one is
9909 * not available to check against '<' presence.
9910 * - tries to homogenize the differences in SAX
9911 * callbacks between the push and pull versions
9912 * of the parser.
9913 */
9914 if ((ctxt->inputNr == 1) &&
9915 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9916 if (!terminate) {
9917 if (ctxt->progressive) {
9918 if ((lastlt == NULL) ||
9919 (ctxt->input->cur > lastlt))
9920 goto done;
9921 } else if (xmlParseLookupSequence(ctxt,
9922 '<', 0, 0) < 0) {
9923 goto done;
9924 }
9925 }
9926 }
9927 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009928 xmlParseCharData(ctxt, 0);
9929 }
9930 /*
9931 * Pop-up of finished entities.
9932 */
9933 while ((RAW == 0) && (ctxt->inputNr > 1))
9934 xmlPopInput(ctxt);
9935 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009936 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9937 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009938 ctxt->instate = XML_PARSER_EOF;
9939 break;
9940 }
9941 break;
9942 }
9943 case XML_PARSER_END_TAG:
9944 if (avail < 2)
9945 goto done;
9946 if (!terminate) {
9947 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009948 /* > can be found unescaped in attribute values */
9949 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009950 goto done;
9951 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9952 goto done;
9953 }
9954 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009955 if (ctxt->sax2) {
9956 xmlParseEndTag2(ctxt,
9957 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9958 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009959 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009960 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009961 }
9962#ifdef LIBXML_SAX1_ENABLED
9963 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009964 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009965#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009966 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009967 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009968 } else {
9969 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009970 }
9971 break;
9972 case XML_PARSER_CDATA_SECTION: {
9973 /*
9974 * The Push mode need to have the SAX callback for
9975 * cdataBlock merge back contiguous callbacks.
9976 */
9977 int base;
9978
9979 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9980 if (base < 0) {
9981 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009982 int tmp;
9983
9984 tmp = xmlCheckCdataPush(ctxt->input->cur,
9985 XML_PARSER_BIG_BUFFER_SIZE);
9986 if (tmp < 0) {
9987 tmp = -tmp;
9988 ctxt->input->cur += tmp;
9989 goto encoding_error;
9990 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009991 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9992 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009993 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009994 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009995 else if (ctxt->sax->characters != NULL)
9996 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009997 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009998 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009999 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010000 ctxt->checkIndex = 0;
10001 }
10002 goto done;
10003 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010004 int tmp;
10005
10006 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10007 if ((tmp < 0) || (tmp != base)) {
10008 tmp = -tmp;
10009 ctxt->input->cur += tmp;
10010 goto encoding_error;
10011 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010012 if ((ctxt->sax != NULL) && (base > 0) &&
10013 (!ctxt->disableSAX)) {
10014 if (ctxt->sax->cdataBlock != NULL)
10015 ctxt->sax->cdataBlock(ctxt->userData,
10016 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010017 else if (ctxt->sax->characters != NULL)
10018 ctxt->sax->characters(ctxt->userData,
10019 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010020 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010021 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010022 ctxt->checkIndex = 0;
10023 ctxt->instate = XML_PARSER_CONTENT;
10024#ifdef DEBUG_PUSH
10025 xmlGenericError(xmlGenericErrorContext,
10026 "PP: entering CONTENT\n");
10027#endif
10028 }
10029 break;
10030 }
Owen Taylor3473f882001-02-23 17:55:21 +000010031 case XML_PARSER_MISC:
10032 SKIP_BLANKS;
10033 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010034 avail = ctxt->input->length -
10035 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010036 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010037 avail = ctxt->input->buf->buffer->use -
10038 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010039 if (avail < 2)
10040 goto done;
10041 cur = ctxt->input->cur[0];
10042 next = ctxt->input->cur[1];
10043 if ((cur == '<') && (next == '?')) {
10044 if ((!terminate) &&
10045 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10046 goto done;
10047#ifdef DEBUG_PUSH
10048 xmlGenericError(xmlGenericErrorContext,
10049 "PP: Parsing PI\n");
10050#endif
10051 xmlParsePI(ctxt);
10052 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010053 (ctxt->input->cur[2] == '-') &&
10054 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010055 if ((!terminate) &&
10056 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10057 goto done;
10058#ifdef DEBUG_PUSH
10059 xmlGenericError(xmlGenericErrorContext,
10060 "PP: Parsing Comment\n");
10061#endif
10062 xmlParseComment(ctxt);
10063 ctxt->instate = XML_PARSER_MISC;
10064 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010065 (ctxt->input->cur[2] == 'D') &&
10066 (ctxt->input->cur[3] == 'O') &&
10067 (ctxt->input->cur[4] == 'C') &&
10068 (ctxt->input->cur[5] == 'T') &&
10069 (ctxt->input->cur[6] == 'Y') &&
10070 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010071 (ctxt->input->cur[8] == 'E')) {
10072 if ((!terminate) &&
10073 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10074 goto done;
10075#ifdef DEBUG_PUSH
10076 xmlGenericError(xmlGenericErrorContext,
10077 "PP: Parsing internal subset\n");
10078#endif
10079 ctxt->inSubset = 1;
10080 xmlParseDocTypeDecl(ctxt);
10081 if (RAW == '[') {
10082 ctxt->instate = XML_PARSER_DTD;
10083#ifdef DEBUG_PUSH
10084 xmlGenericError(xmlGenericErrorContext,
10085 "PP: entering DTD\n");
10086#endif
10087 } else {
10088 /*
10089 * Create and update the external subset.
10090 */
10091 ctxt->inSubset = 2;
10092 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10093 (ctxt->sax->externalSubset != NULL))
10094 ctxt->sax->externalSubset(ctxt->userData,
10095 ctxt->intSubName, ctxt->extSubSystem,
10096 ctxt->extSubURI);
10097 ctxt->inSubset = 0;
10098 ctxt->instate = XML_PARSER_PROLOG;
10099#ifdef DEBUG_PUSH
10100 xmlGenericError(xmlGenericErrorContext,
10101 "PP: entering PROLOG\n");
10102#endif
10103 }
10104 } else if ((cur == '<') && (next == '!') &&
10105 (avail < 9)) {
10106 goto done;
10107 } else {
10108 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010109 ctxt->progressive = 1;
10110 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010111#ifdef DEBUG_PUSH
10112 xmlGenericError(xmlGenericErrorContext,
10113 "PP: entering START_TAG\n");
10114#endif
10115 }
10116 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010117 case XML_PARSER_PROLOG:
10118 SKIP_BLANKS;
10119 if (ctxt->input->buf == NULL)
10120 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10121 else
10122 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10123 if (avail < 2)
10124 goto done;
10125 cur = ctxt->input->cur[0];
10126 next = ctxt->input->cur[1];
10127 if ((cur == '<') && (next == '?')) {
10128 if ((!terminate) &&
10129 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10130 goto done;
10131#ifdef DEBUG_PUSH
10132 xmlGenericError(xmlGenericErrorContext,
10133 "PP: Parsing PI\n");
10134#endif
10135 xmlParsePI(ctxt);
10136 } else if ((cur == '<') && (next == '!') &&
10137 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10138 if ((!terminate) &&
10139 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10140 goto done;
10141#ifdef DEBUG_PUSH
10142 xmlGenericError(xmlGenericErrorContext,
10143 "PP: Parsing Comment\n");
10144#endif
10145 xmlParseComment(ctxt);
10146 ctxt->instate = XML_PARSER_PROLOG;
10147 } else if ((cur == '<') && (next == '!') &&
10148 (avail < 4)) {
10149 goto done;
10150 } else {
10151 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010152 if (ctxt->progressive == 0)
10153 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010154 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010155#ifdef DEBUG_PUSH
10156 xmlGenericError(xmlGenericErrorContext,
10157 "PP: entering START_TAG\n");
10158#endif
10159 }
10160 break;
10161 case XML_PARSER_EPILOG:
10162 SKIP_BLANKS;
10163 if (ctxt->input->buf == NULL)
10164 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10165 else
10166 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10167 if (avail < 2)
10168 goto done;
10169 cur = ctxt->input->cur[0];
10170 next = ctxt->input->cur[1];
10171 if ((cur == '<') && (next == '?')) {
10172 if ((!terminate) &&
10173 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10174 goto done;
10175#ifdef DEBUG_PUSH
10176 xmlGenericError(xmlGenericErrorContext,
10177 "PP: Parsing PI\n");
10178#endif
10179 xmlParsePI(ctxt);
10180 ctxt->instate = XML_PARSER_EPILOG;
10181 } else if ((cur == '<') && (next == '!') &&
10182 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10183 if ((!terminate) &&
10184 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10185 goto done;
10186#ifdef DEBUG_PUSH
10187 xmlGenericError(xmlGenericErrorContext,
10188 "PP: Parsing Comment\n");
10189#endif
10190 xmlParseComment(ctxt);
10191 ctxt->instate = XML_PARSER_EPILOG;
10192 } else if ((cur == '<') && (next == '!') &&
10193 (avail < 4)) {
10194 goto done;
10195 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010196 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010197 ctxt->instate = XML_PARSER_EOF;
10198#ifdef DEBUG_PUSH
10199 xmlGenericError(xmlGenericErrorContext,
10200 "PP: entering EOF\n");
10201#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010202 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010203 ctxt->sax->endDocument(ctxt->userData);
10204 goto done;
10205 }
10206 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010207 case XML_PARSER_DTD: {
10208 /*
10209 * Sorry but progressive parsing of the internal subset
10210 * is not expected to be supported. We first check that
10211 * the full content of the internal subset is available and
10212 * the parsing is launched only at that point.
10213 * Internal subset ends up with "']' S? '>'" in an unescaped
10214 * section and not in a ']]>' sequence which are conditional
10215 * sections (whoever argued to keep that crap in XML deserve
10216 * a place in hell !).
10217 */
10218 int base, i;
10219 xmlChar *buf;
10220 xmlChar quote = 0;
10221
10222 base = ctxt->input->cur - ctxt->input->base;
10223 if (base < 0) return(0);
10224 if (ctxt->checkIndex > base)
10225 base = ctxt->checkIndex;
10226 buf = ctxt->input->buf->buffer->content;
10227 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10228 base++) {
10229 if (quote != 0) {
10230 if (buf[base] == quote)
10231 quote = 0;
10232 continue;
10233 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010234 if ((quote == 0) && (buf[base] == '<')) {
10235 int found = 0;
10236 /* special handling of comments */
10237 if (((unsigned int) base + 4 <
10238 ctxt->input->buf->buffer->use) &&
10239 (buf[base + 1] == '!') &&
10240 (buf[base + 2] == '-') &&
10241 (buf[base + 3] == '-')) {
10242 for (;(unsigned int) base + 3 <
10243 ctxt->input->buf->buffer->use; base++) {
10244 if ((buf[base] == '-') &&
10245 (buf[base + 1] == '-') &&
10246 (buf[base + 2] == '>')) {
10247 found = 1;
10248 base += 2;
10249 break;
10250 }
10251 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010252 if (!found) {
10253#if 0
10254 fprintf(stderr, "unfinished comment\n");
10255#endif
10256 break; /* for */
10257 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010258 continue;
10259 }
10260 }
Owen Taylor3473f882001-02-23 17:55:21 +000010261 if (buf[base] == '"') {
10262 quote = '"';
10263 continue;
10264 }
10265 if (buf[base] == '\'') {
10266 quote = '\'';
10267 continue;
10268 }
10269 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010270#if 0
10271 fprintf(stderr, "%c%c%c%c: ", buf[base],
10272 buf[base + 1], buf[base + 2], buf[base + 3]);
10273#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010274 if ((unsigned int) base +1 >=
10275 ctxt->input->buf->buffer->use)
10276 break;
10277 if (buf[base + 1] == ']') {
10278 /* conditional crap, skip both ']' ! */
10279 base++;
10280 continue;
10281 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010282 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010283 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10284 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010285 if (buf[base + i] == '>') {
10286#if 0
10287 fprintf(stderr, "found\n");
10288#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010289 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010290 }
10291 if (!IS_BLANK_CH(buf[base + i])) {
10292#if 0
10293 fprintf(stderr, "not found\n");
10294#endif
10295 goto not_end_of_int_subset;
10296 }
Owen Taylor3473f882001-02-23 17:55:21 +000010297 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010298#if 0
10299 fprintf(stderr, "end of stream\n");
10300#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010301 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010302
Owen Taylor3473f882001-02-23 17:55:21 +000010303 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010304not_end_of_int_subset:
10305 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010306 }
10307 /*
10308 * We didn't found the end of the Internal subset
10309 */
Owen Taylor3473f882001-02-23 17:55:21 +000010310#ifdef DEBUG_PUSH
10311 if (next == 0)
10312 xmlGenericError(xmlGenericErrorContext,
10313 "PP: lookup of int subset end filed\n");
10314#endif
10315 goto done;
10316
10317found_end_int_subset:
10318 xmlParseInternalSubset(ctxt);
10319 ctxt->inSubset = 2;
10320 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10321 (ctxt->sax->externalSubset != NULL))
10322 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10323 ctxt->extSubSystem, ctxt->extSubURI);
10324 ctxt->inSubset = 0;
10325 ctxt->instate = XML_PARSER_PROLOG;
10326 ctxt->checkIndex = 0;
10327#ifdef DEBUG_PUSH
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: entering PROLOG\n");
10330#endif
10331 break;
10332 }
10333 case XML_PARSER_COMMENT:
10334 xmlGenericError(xmlGenericErrorContext,
10335 "PP: internal error, state == COMMENT\n");
10336 ctxt->instate = XML_PARSER_CONTENT;
10337#ifdef DEBUG_PUSH
10338 xmlGenericError(xmlGenericErrorContext,
10339 "PP: entering CONTENT\n");
10340#endif
10341 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010342 case XML_PARSER_IGNORE:
10343 xmlGenericError(xmlGenericErrorContext,
10344 "PP: internal error, state == IGNORE");
10345 ctxt->instate = XML_PARSER_DTD;
10346#ifdef DEBUG_PUSH
10347 xmlGenericError(xmlGenericErrorContext,
10348 "PP: entering DTD\n");
10349#endif
10350 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010351 case XML_PARSER_PI:
10352 xmlGenericError(xmlGenericErrorContext,
10353 "PP: internal error, state == PI\n");
10354 ctxt->instate = XML_PARSER_CONTENT;
10355#ifdef DEBUG_PUSH
10356 xmlGenericError(xmlGenericErrorContext,
10357 "PP: entering CONTENT\n");
10358#endif
10359 break;
10360 case XML_PARSER_ENTITY_DECL:
10361 xmlGenericError(xmlGenericErrorContext,
10362 "PP: internal error, state == ENTITY_DECL\n");
10363 ctxt->instate = XML_PARSER_DTD;
10364#ifdef DEBUG_PUSH
10365 xmlGenericError(xmlGenericErrorContext,
10366 "PP: entering DTD\n");
10367#endif
10368 break;
10369 case XML_PARSER_ENTITY_VALUE:
10370 xmlGenericError(xmlGenericErrorContext,
10371 "PP: internal error, state == ENTITY_VALUE\n");
10372 ctxt->instate = XML_PARSER_CONTENT;
10373#ifdef DEBUG_PUSH
10374 xmlGenericError(xmlGenericErrorContext,
10375 "PP: entering DTD\n");
10376#endif
10377 break;
10378 case XML_PARSER_ATTRIBUTE_VALUE:
10379 xmlGenericError(xmlGenericErrorContext,
10380 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10381 ctxt->instate = XML_PARSER_START_TAG;
10382#ifdef DEBUG_PUSH
10383 xmlGenericError(xmlGenericErrorContext,
10384 "PP: entering START_TAG\n");
10385#endif
10386 break;
10387 case XML_PARSER_SYSTEM_LITERAL:
10388 xmlGenericError(xmlGenericErrorContext,
10389 "PP: internal error, state == SYSTEM_LITERAL\n");
10390 ctxt->instate = XML_PARSER_START_TAG;
10391#ifdef DEBUG_PUSH
10392 xmlGenericError(xmlGenericErrorContext,
10393 "PP: entering START_TAG\n");
10394#endif
10395 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010396 case XML_PARSER_PUBLIC_LITERAL:
10397 xmlGenericError(xmlGenericErrorContext,
10398 "PP: internal error, state == PUBLIC_LITERAL\n");
10399 ctxt->instate = XML_PARSER_START_TAG;
10400#ifdef DEBUG_PUSH
10401 xmlGenericError(xmlGenericErrorContext,
10402 "PP: entering START_TAG\n");
10403#endif
10404 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010405 }
10406 }
10407done:
10408#ifdef DEBUG_PUSH
10409 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10410#endif
10411 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010412encoding_error:
10413 {
10414 char buffer[150];
10415
10416 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10417 ctxt->input->cur[0], ctxt->input->cur[1],
10418 ctxt->input->cur[2], ctxt->input->cur[3]);
10419 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10420 "Input is not proper UTF-8, indicate encoding !\n%s",
10421 BAD_CAST buffer, NULL);
10422 }
10423 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010424}
10425
10426/**
Owen Taylor3473f882001-02-23 17:55:21 +000010427 * xmlParseChunk:
10428 * @ctxt: an XML parser context
10429 * @chunk: an char array
10430 * @size: the size in byte of the chunk
10431 * @terminate: last chunk indicator
10432 *
10433 * Parse a Chunk of memory
10434 *
10435 * Returns zero if no error, the xmlParserErrors otherwise.
10436 */
10437int
10438xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10439 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010440 int end_in_lf = 0;
10441
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010442 if (ctxt == NULL)
10443 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010444 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010445 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010446 if (ctxt->instate == XML_PARSER_START)
10447 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010448 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10449 (chunk[size - 1] == '\r')) {
10450 end_in_lf = 1;
10451 size--;
10452 }
Owen Taylor3473f882001-02-23 17:55:21 +000010453 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10454 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10455 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10456 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010457 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010458
William M. Bracka3215c72004-07-31 16:24:01 +000010459 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10460 if (res < 0) {
10461 ctxt->errNo = XML_PARSER_EOF;
10462 ctxt->disableSAX = 1;
10463 return (XML_PARSER_EOF);
10464 }
Owen Taylor3473f882001-02-23 17:55:21 +000010465 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10466 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010467 ctxt->input->end =
10468 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010469#ifdef DEBUG_PUSH
10470 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10471#endif
10472
Owen Taylor3473f882001-02-23 17:55:21 +000010473 } else if (ctxt->instate != XML_PARSER_EOF) {
10474 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10475 xmlParserInputBufferPtr in = ctxt->input->buf;
10476 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10477 (in->raw != NULL)) {
10478 int nbchars;
10479
10480 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10481 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010482 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010483 xmlGenericError(xmlGenericErrorContext,
10484 "xmlParseChunk: encoder error\n");
10485 return(XML_ERR_INVALID_ENCODING);
10486 }
10487 }
10488 }
10489 }
10490 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010491 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10492 (ctxt->input->buf != NULL)) {
10493 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10494 }
Daniel Veillard14412512005-01-21 23:53:26 +000010495 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010496 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010497 if (terminate) {
10498 /*
10499 * Check for termination
10500 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010501 int avail = 0;
10502
10503 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010504 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010505 avail = ctxt->input->length -
10506 (ctxt->input->cur - ctxt->input->base);
10507 else
10508 avail = ctxt->input->buf->buffer->use -
10509 (ctxt->input->cur - ctxt->input->base);
10510 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010511
Owen Taylor3473f882001-02-23 17:55:21 +000010512 if ((ctxt->instate != XML_PARSER_EOF) &&
10513 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010514 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010515 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010516 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010517 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010518 }
Owen Taylor3473f882001-02-23 17:55:21 +000010519 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010520 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010521 ctxt->sax->endDocument(ctxt->userData);
10522 }
10523 ctxt->instate = XML_PARSER_EOF;
10524 }
10525 return((xmlParserErrors) ctxt->errNo);
10526}
10527
10528/************************************************************************
10529 * *
10530 * I/O front end functions to the parser *
10531 * *
10532 ************************************************************************/
10533
10534/**
Owen Taylor3473f882001-02-23 17:55:21 +000010535 * xmlCreatePushParserCtxt:
10536 * @sax: a SAX handler
10537 * @user_data: The user data returned on SAX callbacks
10538 * @chunk: a pointer to an array of chars
10539 * @size: number of chars in the array
10540 * @filename: an optional file name or URI
10541 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010542 * Create a parser context for using the XML parser in push mode.
10543 * If @buffer and @size are non-NULL, the data is used to detect
10544 * the encoding. The remaining characters will be parsed so they
10545 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010546 * To allow content encoding detection, @size should be >= 4
10547 * The value of @filename is used for fetching external entities
10548 * and error/warning reports.
10549 *
10550 * Returns the new parser context or NULL
10551 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010552
Owen Taylor3473f882001-02-23 17:55:21 +000010553xmlParserCtxtPtr
10554xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10555 const char *chunk, int size, const char *filename) {
10556 xmlParserCtxtPtr ctxt;
10557 xmlParserInputPtr inputStream;
10558 xmlParserInputBufferPtr buf;
10559 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10560
10561 /*
10562 * plug some encoding conversion routines
10563 */
10564 if ((chunk != NULL) && (size >= 4))
10565 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10566
10567 buf = xmlAllocParserInputBuffer(enc);
10568 if (buf == NULL) return(NULL);
10569
10570 ctxt = xmlNewParserCtxt();
10571 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010572 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010573 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010574 return(NULL);
10575 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010576 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010577 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10578 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010579 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010580 xmlFreeParserInputBuffer(buf);
10581 xmlFreeParserCtxt(ctxt);
10582 return(NULL);
10583 }
Owen Taylor3473f882001-02-23 17:55:21 +000010584 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010585#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010586 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010587#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010588 xmlFree(ctxt->sax);
10589 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10590 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010591 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010592 xmlFreeParserInputBuffer(buf);
10593 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010594 return(NULL);
10595 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010596 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10597 if (sax->initialized == XML_SAX2_MAGIC)
10598 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10599 else
10600 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010601 if (user_data != NULL)
10602 ctxt->userData = user_data;
10603 }
10604 if (filename == NULL) {
10605 ctxt->directory = NULL;
10606 } else {
10607 ctxt->directory = xmlParserGetDirectory(filename);
10608 }
10609
10610 inputStream = xmlNewInputStream(ctxt);
10611 if (inputStream == NULL) {
10612 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010613 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010614 return(NULL);
10615 }
10616
10617 if (filename == NULL)
10618 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010619 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010620 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010621 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010622 if (inputStream->filename == NULL) {
10623 xmlFreeParserCtxt(ctxt);
10624 xmlFreeParserInputBuffer(buf);
10625 return(NULL);
10626 }
10627 }
Owen Taylor3473f882001-02-23 17:55:21 +000010628 inputStream->buf = buf;
10629 inputStream->base = inputStream->buf->buffer->content;
10630 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010631 inputStream->end =
10632 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010633
10634 inputPush(ctxt, inputStream);
10635
William M. Brack3a1cd212005-02-11 14:35:54 +000010636 /*
10637 * If the caller didn't provide an initial 'chunk' for determining
10638 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10639 * that it can be automatically determined later
10640 */
10641 if ((size == 0) || (chunk == NULL)) {
10642 ctxt->charset = XML_CHAR_ENCODING_NONE;
10643 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010644 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10645 int cur = ctxt->input->cur - ctxt->input->base;
10646
Owen Taylor3473f882001-02-23 17:55:21 +000010647 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010648
10649 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10650 ctxt->input->cur = ctxt->input->base + cur;
10651 ctxt->input->end =
10652 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010653#ifdef DEBUG_PUSH
10654 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10655#endif
10656 }
10657
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010658 if (enc != XML_CHAR_ENCODING_NONE) {
10659 xmlSwitchEncoding(ctxt, enc);
10660 }
10661
Owen Taylor3473f882001-02-23 17:55:21 +000010662 return(ctxt);
10663}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010664#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010665
10666/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010667 * xmlStopParser:
10668 * @ctxt: an XML parser context
10669 *
10670 * Blocks further parser processing
10671 */
10672void
10673xmlStopParser(xmlParserCtxtPtr ctxt) {
10674 if (ctxt == NULL)
10675 return;
10676 ctxt->instate = XML_PARSER_EOF;
10677 ctxt->disableSAX = 1;
10678 if (ctxt->input != NULL) {
10679 ctxt->input->cur = BAD_CAST"";
10680 ctxt->input->base = ctxt->input->cur;
10681 }
10682}
10683
10684/**
Owen Taylor3473f882001-02-23 17:55:21 +000010685 * xmlCreateIOParserCtxt:
10686 * @sax: a SAX handler
10687 * @user_data: The user data returned on SAX callbacks
10688 * @ioread: an I/O read function
10689 * @ioclose: an I/O close function
10690 * @ioctx: an I/O handler
10691 * @enc: the charset encoding if known
10692 *
10693 * Create a parser context for using the XML parser with an existing
10694 * I/O stream
10695 *
10696 * Returns the new parser context or NULL
10697 */
10698xmlParserCtxtPtr
10699xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10700 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10701 void *ioctx, xmlCharEncoding enc) {
10702 xmlParserCtxtPtr ctxt;
10703 xmlParserInputPtr inputStream;
10704 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010705
10706 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010707
10708 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10709 if (buf == NULL) return(NULL);
10710
10711 ctxt = xmlNewParserCtxt();
10712 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010713 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010714 return(NULL);
10715 }
10716 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010717#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010718 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010719#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010720 xmlFree(ctxt->sax);
10721 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10722 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010723 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010724 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 return(NULL);
10726 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010727 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10728 if (sax->initialized == XML_SAX2_MAGIC)
10729 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10730 else
10731 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010732 if (user_data != NULL)
10733 ctxt->userData = user_data;
10734 }
10735
10736 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10737 if (inputStream == NULL) {
10738 xmlFreeParserCtxt(ctxt);
10739 return(NULL);
10740 }
10741 inputPush(ctxt, inputStream);
10742
10743 return(ctxt);
10744}
10745
Daniel Veillard4432df22003-09-28 18:58:27 +000010746#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010747/************************************************************************
10748 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010749 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010750 * *
10751 ************************************************************************/
10752
10753/**
10754 * xmlIOParseDTD:
10755 * @sax: the SAX handler block or NULL
10756 * @input: an Input Buffer
10757 * @enc: the charset encoding if known
10758 *
10759 * Load and parse a DTD
10760 *
10761 * Returns the resulting xmlDtdPtr or NULL in case of error.
10762 * @input will be freed at parsing end.
10763 */
10764
10765xmlDtdPtr
10766xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10767 xmlCharEncoding enc) {
10768 xmlDtdPtr ret = NULL;
10769 xmlParserCtxtPtr ctxt;
10770 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010771 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010772
10773 if (input == NULL)
10774 return(NULL);
10775
10776 ctxt = xmlNewParserCtxt();
10777 if (ctxt == NULL) {
10778 return(NULL);
10779 }
10780
10781 /*
10782 * Set-up the SAX context
10783 */
10784 if (sax != NULL) {
10785 if (ctxt->sax != NULL)
10786 xmlFree(ctxt->sax);
10787 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010788 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010789 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010790 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010791
10792 /*
10793 * generate a parser input from the I/O handler
10794 */
10795
Daniel Veillard43caefb2003-12-07 19:32:22 +000010796 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010797 if (pinput == NULL) {
10798 if (sax != NULL) ctxt->sax = NULL;
10799 xmlFreeParserCtxt(ctxt);
10800 return(NULL);
10801 }
10802
10803 /*
10804 * plug some encoding conversion routines here.
10805 */
10806 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010807 if (enc != XML_CHAR_ENCODING_NONE) {
10808 xmlSwitchEncoding(ctxt, enc);
10809 }
Owen Taylor3473f882001-02-23 17:55:21 +000010810
10811 pinput->filename = NULL;
10812 pinput->line = 1;
10813 pinput->col = 1;
10814 pinput->base = ctxt->input->cur;
10815 pinput->cur = ctxt->input->cur;
10816 pinput->free = NULL;
10817
10818 /*
10819 * let's parse that entity knowing it's an external subset.
10820 */
10821 ctxt->inSubset = 2;
10822 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10823 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10824 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010825
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010826 if ((enc == XML_CHAR_ENCODING_NONE) &&
10827 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010828 /*
10829 * Get the 4 first bytes and decode the charset
10830 * if enc != XML_CHAR_ENCODING_NONE
10831 * plug some encoding conversion routines.
10832 */
10833 start[0] = RAW;
10834 start[1] = NXT(1);
10835 start[2] = NXT(2);
10836 start[3] = NXT(3);
10837 enc = xmlDetectCharEncoding(start, 4);
10838 if (enc != XML_CHAR_ENCODING_NONE) {
10839 xmlSwitchEncoding(ctxt, enc);
10840 }
10841 }
10842
Owen Taylor3473f882001-02-23 17:55:21 +000010843 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10844
10845 if (ctxt->myDoc != NULL) {
10846 if (ctxt->wellFormed) {
10847 ret = ctxt->myDoc->extSubset;
10848 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010849 if (ret != NULL) {
10850 xmlNodePtr tmp;
10851
10852 ret->doc = NULL;
10853 tmp = ret->children;
10854 while (tmp != NULL) {
10855 tmp->doc = NULL;
10856 tmp = tmp->next;
10857 }
10858 }
Owen Taylor3473f882001-02-23 17:55:21 +000010859 } else {
10860 ret = NULL;
10861 }
10862 xmlFreeDoc(ctxt->myDoc);
10863 ctxt->myDoc = NULL;
10864 }
10865 if (sax != NULL) ctxt->sax = NULL;
10866 xmlFreeParserCtxt(ctxt);
10867
10868 return(ret);
10869}
10870
10871/**
10872 * xmlSAXParseDTD:
10873 * @sax: the SAX handler block
10874 * @ExternalID: a NAME* containing the External ID of the DTD
10875 * @SystemID: a NAME* containing the URL to the DTD
10876 *
10877 * Load and parse an external subset.
10878 *
10879 * Returns the resulting xmlDtdPtr or NULL in case of error.
10880 */
10881
10882xmlDtdPtr
10883xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10884 const xmlChar *SystemID) {
10885 xmlDtdPtr ret = NULL;
10886 xmlParserCtxtPtr ctxt;
10887 xmlParserInputPtr input = NULL;
10888 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010889 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010890
10891 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10892
10893 ctxt = xmlNewParserCtxt();
10894 if (ctxt == NULL) {
10895 return(NULL);
10896 }
10897
10898 /*
10899 * Set-up the SAX context
10900 */
10901 if (sax != NULL) {
10902 if (ctxt->sax != NULL)
10903 xmlFree(ctxt->sax);
10904 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010905 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010906 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010907
10908 /*
10909 * Canonicalise the system ID
10910 */
10911 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010912 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010913 xmlFreeParserCtxt(ctxt);
10914 return(NULL);
10915 }
Owen Taylor3473f882001-02-23 17:55:21 +000010916
10917 /*
10918 * Ask the Entity resolver to load the damn thing
10919 */
10920
10921 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010922 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010923 if (input == NULL) {
10924 if (sax != NULL) ctxt->sax = NULL;
10925 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010926 if (systemIdCanonic != NULL)
10927 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010928 return(NULL);
10929 }
10930
10931 /*
10932 * plug some encoding conversion routines here.
10933 */
10934 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010935 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10936 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10937 xmlSwitchEncoding(ctxt, enc);
10938 }
Owen Taylor3473f882001-02-23 17:55:21 +000010939
10940 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010941 input->filename = (char *) systemIdCanonic;
10942 else
10943 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010944 input->line = 1;
10945 input->col = 1;
10946 input->base = ctxt->input->cur;
10947 input->cur = ctxt->input->cur;
10948 input->free = NULL;
10949
10950 /*
10951 * let's parse that entity knowing it's an external subset.
10952 */
10953 ctxt->inSubset = 2;
10954 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10955 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10956 ExternalID, SystemID);
10957 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10958
10959 if (ctxt->myDoc != NULL) {
10960 if (ctxt->wellFormed) {
10961 ret = ctxt->myDoc->extSubset;
10962 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010963 if (ret != NULL) {
10964 xmlNodePtr tmp;
10965
10966 ret->doc = NULL;
10967 tmp = ret->children;
10968 while (tmp != NULL) {
10969 tmp->doc = NULL;
10970 tmp = tmp->next;
10971 }
10972 }
Owen Taylor3473f882001-02-23 17:55:21 +000010973 } else {
10974 ret = NULL;
10975 }
10976 xmlFreeDoc(ctxt->myDoc);
10977 ctxt->myDoc = NULL;
10978 }
10979 if (sax != NULL) ctxt->sax = NULL;
10980 xmlFreeParserCtxt(ctxt);
10981
10982 return(ret);
10983}
10984
Daniel Veillard4432df22003-09-28 18:58:27 +000010985
Owen Taylor3473f882001-02-23 17:55:21 +000010986/**
10987 * xmlParseDTD:
10988 * @ExternalID: a NAME* containing the External ID of the DTD
10989 * @SystemID: a NAME* containing the URL to the DTD
10990 *
10991 * Load and parse an external subset.
10992 *
10993 * Returns the resulting xmlDtdPtr or NULL in case of error.
10994 */
10995
10996xmlDtdPtr
10997xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10998 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10999}
Daniel Veillard4432df22003-09-28 18:58:27 +000011000#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011001
11002/************************************************************************
11003 * *
11004 * Front ends when parsing an Entity *
11005 * *
11006 ************************************************************************/
11007
11008/**
Owen Taylor3473f882001-02-23 17:55:21 +000011009 * xmlParseCtxtExternalEntity:
11010 * @ctx: the existing parsing context
11011 * @URL: the URL for the entity to load
11012 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011013 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011014 *
11015 * Parse an external general entity within an existing parsing context
11016 * An external general parsed entity is well-formed if it matches the
11017 * production labeled extParsedEnt.
11018 *
11019 * [78] extParsedEnt ::= TextDecl? content
11020 *
11021 * Returns 0 if the entity is well formed, -1 in case of args problem and
11022 * the parser error code otherwise
11023 */
11024
11025int
11026xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011027 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011028 xmlParserCtxtPtr ctxt;
11029 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011030 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011031 xmlSAXHandlerPtr oldsax = NULL;
11032 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011033 xmlChar start[4];
11034 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011035
Daniel Veillardce682bc2004-11-05 17:22:25 +000011036 if (ctx == NULL) return(-1);
11037
Owen Taylor3473f882001-02-23 17:55:21 +000011038 if (ctx->depth > 40) {
11039 return(XML_ERR_ENTITY_LOOP);
11040 }
11041
Daniel Veillardcda96922001-08-21 10:56:31 +000011042 if (lst != NULL)
11043 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011044 if ((URL == NULL) && (ID == NULL))
11045 return(-1);
11046 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11047 return(-1);
11048
11049
11050 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11051 if (ctxt == NULL) return(-1);
11052 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011053 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011054 oldsax = ctxt->sax;
11055 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011056 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011057 newDoc = xmlNewDoc(BAD_CAST "1.0");
11058 if (newDoc == NULL) {
11059 xmlFreeParserCtxt(ctxt);
11060 return(-1);
11061 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011062 if (ctx->myDoc->dict) {
11063 newDoc->dict = ctx->myDoc->dict;
11064 xmlDictReference(newDoc->dict);
11065 }
Owen Taylor3473f882001-02-23 17:55:21 +000011066 if (ctx->myDoc != NULL) {
11067 newDoc->intSubset = ctx->myDoc->intSubset;
11068 newDoc->extSubset = ctx->myDoc->extSubset;
11069 }
11070 if (ctx->myDoc->URL != NULL) {
11071 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11072 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011073 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11074 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011075 ctxt->sax = oldsax;
11076 xmlFreeParserCtxt(ctxt);
11077 newDoc->intSubset = NULL;
11078 newDoc->extSubset = NULL;
11079 xmlFreeDoc(newDoc);
11080 return(-1);
11081 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011082 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011083 nodePush(ctxt, newDoc->children);
11084 if (ctx->myDoc == NULL) {
11085 ctxt->myDoc = newDoc;
11086 } else {
11087 ctxt->myDoc = ctx->myDoc;
11088 newDoc->children->doc = ctx->myDoc;
11089 }
11090
Daniel Veillard87a764e2001-06-20 17:41:10 +000011091 /*
11092 * Get the 4 first bytes and decode the charset
11093 * if enc != XML_CHAR_ENCODING_NONE
11094 * plug some encoding conversion routines.
11095 */
11096 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011097 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11098 start[0] = RAW;
11099 start[1] = NXT(1);
11100 start[2] = NXT(2);
11101 start[3] = NXT(3);
11102 enc = xmlDetectCharEncoding(start, 4);
11103 if (enc != XML_CHAR_ENCODING_NONE) {
11104 xmlSwitchEncoding(ctxt, enc);
11105 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011106 }
11107
Owen Taylor3473f882001-02-23 17:55:21 +000011108 /*
11109 * Parse a possible text declaration first
11110 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011111 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011112 xmlParseTextDecl(ctxt);
11113 }
11114
11115 /*
11116 * Doing validity checking on chunk doesn't make sense
11117 */
11118 ctxt->instate = XML_PARSER_CONTENT;
11119 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011120 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011121 ctxt->loadsubset = ctx->loadsubset;
11122 ctxt->depth = ctx->depth + 1;
11123 ctxt->replaceEntities = ctx->replaceEntities;
11124 if (ctxt->validate) {
11125 ctxt->vctxt.error = ctx->vctxt.error;
11126 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011127 } else {
11128 ctxt->vctxt.error = NULL;
11129 ctxt->vctxt.warning = NULL;
11130 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011131 ctxt->vctxt.nodeTab = NULL;
11132 ctxt->vctxt.nodeNr = 0;
11133 ctxt->vctxt.nodeMax = 0;
11134 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011135 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11136 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011137 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11138 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11139 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011140 ctxt->dictNames = ctx->dictNames;
11141 ctxt->attsDefault = ctx->attsDefault;
11142 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011143 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011144
11145 xmlParseContent(ctxt);
11146
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011147 ctx->validate = ctxt->validate;
11148 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011149 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011150 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011151 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011152 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011153 }
11154 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011156 }
11157
11158 if (!ctxt->wellFormed) {
11159 if (ctxt->errNo == 0)
11160 ret = 1;
11161 else
11162 ret = ctxt->errNo;
11163 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011164 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011165 xmlNodePtr cur;
11166
11167 /*
11168 * Return the newly created nodeset after unlinking it from
11169 * they pseudo parent.
11170 */
11171 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011172 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011173 while (cur != NULL) {
11174 cur->parent = NULL;
11175 cur = cur->next;
11176 }
11177 newDoc->children->children = NULL;
11178 }
11179 ret = 0;
11180 }
11181 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011182 ctxt->dict = NULL;
11183 ctxt->attsDefault = NULL;
11184 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011185 xmlFreeParserCtxt(ctxt);
11186 newDoc->intSubset = NULL;
11187 newDoc->extSubset = NULL;
11188 xmlFreeDoc(newDoc);
11189
11190 return(ret);
11191}
11192
11193/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011194 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011195 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011196 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011197 * @sax: the SAX handler bloc (possibly NULL)
11198 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11199 * @depth: Used for loop detection, use 0
11200 * @URL: the URL for the entity to load
11201 * @ID: the System ID for the entity to load
11202 * @list: the return value for the set of parsed nodes
11203 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011204 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011205 *
11206 * Returns 0 if the entity is well formed, -1 in case of args problem and
11207 * the parser error code otherwise
11208 */
11209
Daniel Veillard7d515752003-09-26 19:12:37 +000011210static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011211xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11212 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011213 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011214 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011215 xmlParserCtxtPtr ctxt;
11216 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011217 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011218 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011219 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011220 xmlChar start[4];
11221 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011222
11223 if (depth > 40) {
11224 return(XML_ERR_ENTITY_LOOP);
11225 }
11226
11227
11228
11229 if (list != NULL)
11230 *list = NULL;
11231 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011232 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011233 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011234 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011235
11236
11237 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011238 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011239 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011240 if (oldctxt != NULL) {
11241 ctxt->_private = oldctxt->_private;
11242 ctxt->loadsubset = oldctxt->loadsubset;
11243 ctxt->validate = oldctxt->validate;
11244 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011245 ctxt->record_info = oldctxt->record_info;
11246 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11247 ctxt->node_seq.length = oldctxt->node_seq.length;
11248 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011249 } else {
11250 /*
11251 * Doing validity checking on chunk without context
11252 * doesn't make sense
11253 */
11254 ctxt->_private = NULL;
11255 ctxt->validate = 0;
11256 ctxt->external = 2;
11257 ctxt->loadsubset = 0;
11258 }
Owen Taylor3473f882001-02-23 17:55:21 +000011259 if (sax != NULL) {
11260 oldsax = ctxt->sax;
11261 ctxt->sax = sax;
11262 if (user_data != NULL)
11263 ctxt->userData = user_data;
11264 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011265 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011266 newDoc = xmlNewDoc(BAD_CAST "1.0");
11267 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011268 ctxt->node_seq.maximum = 0;
11269 ctxt->node_seq.length = 0;
11270 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011271 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011272 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011273 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011274 newDoc->intSubset = doc->intSubset;
11275 newDoc->extSubset = doc->extSubset;
11276 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011277 xmlDictReference(newDoc->dict);
11278
Owen Taylor3473f882001-02-23 17:55:21 +000011279 if (doc->URL != NULL) {
11280 newDoc->URL = xmlStrdup(doc->URL);
11281 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011282 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11283 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011284 if (sax != NULL)
11285 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011286 ctxt->node_seq.maximum = 0;
11287 ctxt->node_seq.length = 0;
11288 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011289 xmlFreeParserCtxt(ctxt);
11290 newDoc->intSubset = NULL;
11291 newDoc->extSubset = NULL;
11292 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011293 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011294 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011295 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011296 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011297 ctxt->myDoc = doc;
11298 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011299
Daniel Veillard87a764e2001-06-20 17:41:10 +000011300 /*
11301 * Get the 4 first bytes and decode the charset
11302 * if enc != XML_CHAR_ENCODING_NONE
11303 * plug some encoding conversion routines.
11304 */
11305 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011306 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11307 start[0] = RAW;
11308 start[1] = NXT(1);
11309 start[2] = NXT(2);
11310 start[3] = NXT(3);
11311 enc = xmlDetectCharEncoding(start, 4);
11312 if (enc != XML_CHAR_ENCODING_NONE) {
11313 xmlSwitchEncoding(ctxt, enc);
11314 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011315 }
11316
Owen Taylor3473f882001-02-23 17:55:21 +000011317 /*
11318 * Parse a possible text declaration first
11319 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011320 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011321 xmlParseTextDecl(ctxt);
11322 }
11323
Owen Taylor3473f882001-02-23 17:55:21 +000011324 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011325 ctxt->depth = depth;
11326
11327 xmlParseContent(ctxt);
11328
Daniel Veillard561b7f82002-03-20 21:55:57 +000011329 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011330 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011331 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011332 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011333 }
11334 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011335 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011336 }
11337
11338 if (!ctxt->wellFormed) {
11339 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011340 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011341 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011342 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011343 } else {
11344 if (list != NULL) {
11345 xmlNodePtr cur;
11346
11347 /*
11348 * Return the newly created nodeset after unlinking it from
11349 * they pseudo parent.
11350 */
11351 cur = newDoc->children->children;
11352 *list = cur;
11353 while (cur != NULL) {
11354 cur->parent = NULL;
11355 cur = cur->next;
11356 }
11357 newDoc->children->children = NULL;
11358 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011359 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011360 }
11361 if (sax != NULL)
11362 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011363 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11364 oldctxt->node_seq.length = ctxt->node_seq.length;
11365 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011366 ctxt->node_seq.maximum = 0;
11367 ctxt->node_seq.length = 0;
11368 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011369 xmlFreeParserCtxt(ctxt);
11370 newDoc->intSubset = NULL;
11371 newDoc->extSubset = NULL;
11372 xmlFreeDoc(newDoc);
11373
11374 return(ret);
11375}
11376
Daniel Veillard81273902003-09-30 00:43:48 +000011377#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011378/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011379 * xmlParseExternalEntity:
11380 * @doc: the document the chunk pertains to
11381 * @sax: the SAX handler bloc (possibly NULL)
11382 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11383 * @depth: Used for loop detection, use 0
11384 * @URL: the URL for the entity to load
11385 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011386 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011387 *
11388 * Parse an external general entity
11389 * An external general parsed entity is well-formed if it matches the
11390 * production labeled extParsedEnt.
11391 *
11392 * [78] extParsedEnt ::= TextDecl? content
11393 *
11394 * Returns 0 if the entity is well formed, -1 in case of args problem and
11395 * the parser error code otherwise
11396 */
11397
11398int
11399xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011400 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011401 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011402 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011403}
11404
11405/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011406 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011407 * @doc: the document the chunk pertains to
11408 * @sax: the SAX handler bloc (possibly NULL)
11409 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11410 * @depth: Used for loop detection, use 0
11411 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011412 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011413 *
11414 * Parse a well-balanced chunk of an XML document
11415 * called by the parser
11416 * The allowed sequence for the Well Balanced Chunk is the one defined by
11417 * the content production in the XML grammar:
11418 *
11419 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11420 *
11421 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11422 * the parser error code otherwise
11423 */
11424
11425int
11426xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011427 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011428 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11429 depth, string, lst, 0 );
11430}
Daniel Veillard81273902003-09-30 00:43:48 +000011431#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011432
11433/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011434 * xmlParseBalancedChunkMemoryInternal:
11435 * @oldctxt: the existing parsing context
11436 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11437 * @user_data: the user data field for the parser context
11438 * @lst: the return value for the set of parsed nodes
11439 *
11440 *
11441 * Parse a well-balanced chunk of an XML document
11442 * called by the parser
11443 * The allowed sequence for the Well Balanced Chunk is the one defined by
11444 * the content production in the XML grammar:
11445 *
11446 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11447 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011448 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11449 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011450 *
11451 * In case recover is set to 1, the nodelist will not be empty even if
11452 * the parsed chunk is not well balanced.
11453 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011454static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011455xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11456 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11457 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011458 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011459 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011460 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011461 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011462 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011463 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011464 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011465
11466 if (oldctxt->depth > 40) {
11467 return(XML_ERR_ENTITY_LOOP);
11468 }
11469
11470
11471 if (lst != NULL)
11472 *lst = NULL;
11473 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011474 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011475
11476 size = xmlStrlen(string);
11477
11478 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011479 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011480 if (user_data != NULL)
11481 ctxt->userData = user_data;
11482 else
11483 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011484 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11485 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011486 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11487 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11488 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011489
11490 oldsax = ctxt->sax;
11491 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011492 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011493 ctxt->replaceEntities = oldctxt->replaceEntities;
11494 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011495
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011496 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011497 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011498 newDoc = xmlNewDoc(BAD_CAST "1.0");
11499 if (newDoc == NULL) {
11500 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011501 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011502 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011503 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011504 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011505 newDoc->dict = ctxt->dict;
11506 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011507 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011508 } else {
11509 ctxt->myDoc = oldctxt->myDoc;
11510 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011511 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011512 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011513 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11514 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011515 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011516 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011517 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011518 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011519 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011520 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011521 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011522 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011523 ctxt->myDoc->children = NULL;
11524 ctxt->myDoc->last = NULL;
11525 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011526 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011527 ctxt->instate = XML_PARSER_CONTENT;
11528 ctxt->depth = oldctxt->depth + 1;
11529
Daniel Veillard328f48c2002-11-15 15:24:34 +000011530 ctxt->validate = 0;
11531 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011532 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11533 /*
11534 * ID/IDREF registration will be done in xmlValidateElement below
11535 */
11536 ctxt->loadsubset |= XML_SKIP_IDS;
11537 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011538 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011539 ctxt->attsDefault = oldctxt->attsDefault;
11540 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011541
Daniel Veillard68e9e742002-11-16 15:35:11 +000011542 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011543 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011544 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011545 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011546 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011547 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011548 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011549 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011550 }
11551
11552 if (!ctxt->wellFormed) {
11553 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011554 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011555 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011556 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011557 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011558 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011559 }
11560
William M. Brack7b9154b2003-09-27 19:23:50 +000011561 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011562 xmlNodePtr cur;
11563
11564 /*
11565 * Return the newly created nodeset after unlinking it from
11566 * they pseudo parent.
11567 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011568 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011569 *lst = cur;
11570 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011571#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011572 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11573 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11574 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011575 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11576 oldctxt->myDoc, cur);
11577 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011578#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011579 cur->parent = NULL;
11580 cur = cur->next;
11581 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011582 ctxt->myDoc->children->children = NULL;
11583 }
11584 if (ctxt->myDoc != NULL) {
11585 xmlFreeNode(ctxt->myDoc->children);
11586 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011587 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011588 }
11589
11590 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011591 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011592 ctxt->attsDefault = NULL;
11593 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011594 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011595 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011596 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011597 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011598
11599 return(ret);
11600}
11601
Daniel Veillard29b17482004-08-16 00:39:03 +000011602/**
11603 * xmlParseInNodeContext:
11604 * @node: the context node
11605 * @data: the input string
11606 * @datalen: the input string length in bytes
11607 * @options: a combination of xmlParserOption
11608 * @lst: the return value for the set of parsed nodes
11609 *
11610 * Parse a well-balanced chunk of an XML document
11611 * within the context (DTD, namespaces, etc ...) of the given node.
11612 *
11613 * The allowed sequence for the data is a Well Balanced Chunk defined by
11614 * the content production in the XML grammar:
11615 *
11616 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11617 *
11618 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11619 * error code otherwise
11620 */
11621xmlParserErrors
11622xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11623 int options, xmlNodePtr *lst) {
11624#ifdef SAX2
11625 xmlParserCtxtPtr ctxt;
11626 xmlDocPtr doc = NULL;
11627 xmlNodePtr fake, cur;
11628 int nsnr = 0;
11629
11630 xmlParserErrors ret = XML_ERR_OK;
11631
11632 /*
11633 * check all input parameters, grab the document
11634 */
11635 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11636 return(XML_ERR_INTERNAL_ERROR);
11637 switch (node->type) {
11638 case XML_ELEMENT_NODE:
11639 case XML_ATTRIBUTE_NODE:
11640 case XML_TEXT_NODE:
11641 case XML_CDATA_SECTION_NODE:
11642 case XML_ENTITY_REF_NODE:
11643 case XML_PI_NODE:
11644 case XML_COMMENT_NODE:
11645 case XML_DOCUMENT_NODE:
11646 case XML_HTML_DOCUMENT_NODE:
11647 break;
11648 default:
11649 return(XML_ERR_INTERNAL_ERROR);
11650
11651 }
11652 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11653 (node->type != XML_DOCUMENT_NODE) &&
11654 (node->type != XML_HTML_DOCUMENT_NODE))
11655 node = node->parent;
11656 if (node == NULL)
11657 return(XML_ERR_INTERNAL_ERROR);
11658 if (node->type == XML_ELEMENT_NODE)
11659 doc = node->doc;
11660 else
11661 doc = (xmlDocPtr) node;
11662 if (doc == NULL)
11663 return(XML_ERR_INTERNAL_ERROR);
11664
11665 /*
11666 * allocate a context and set-up everything not related to the
11667 * node position in the tree
11668 */
11669 if (doc->type == XML_DOCUMENT_NODE)
11670 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11671#ifdef LIBXML_HTML_ENABLED
11672 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11673 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11674#endif
11675 else
11676 return(XML_ERR_INTERNAL_ERROR);
11677
11678 if (ctxt == NULL)
11679 return(XML_ERR_NO_MEMORY);
11680 fake = xmlNewComment(NULL);
11681 if (fake == NULL) {
11682 xmlFreeParserCtxt(ctxt);
11683 return(XML_ERR_NO_MEMORY);
11684 }
11685 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011686
11687 /*
11688 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11689 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11690 * we must wait until the last moment to free the original one.
11691 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011692 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011693 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011694 xmlDictFree(ctxt->dict);
11695 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011696 } else
11697 options |= XML_PARSE_NODICT;
11698
11699 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011700 xmlDetectSAX2(ctxt);
11701 ctxt->myDoc = doc;
11702
11703 if (node->type == XML_ELEMENT_NODE) {
11704 nodePush(ctxt, node);
11705 /*
11706 * initialize the SAX2 namespaces stack
11707 */
11708 cur = node;
11709 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11710 xmlNsPtr ns = cur->nsDef;
11711 const xmlChar *iprefix, *ihref;
11712
11713 while (ns != NULL) {
11714 if (ctxt->dict) {
11715 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11716 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11717 } else {
11718 iprefix = ns->prefix;
11719 ihref = ns->href;
11720 }
11721
11722 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11723 nsPush(ctxt, iprefix, ihref);
11724 nsnr++;
11725 }
11726 ns = ns->next;
11727 }
11728 cur = cur->parent;
11729 }
11730 ctxt->instate = XML_PARSER_CONTENT;
11731 }
11732
11733 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11734 /*
11735 * ID/IDREF registration will be done in xmlValidateElement below
11736 */
11737 ctxt->loadsubset |= XML_SKIP_IDS;
11738 }
11739
Daniel Veillard499cc922006-01-18 17:22:35 +000011740#ifdef LIBXML_HTML_ENABLED
11741 if (doc->type == XML_HTML_DOCUMENT_NODE)
11742 __htmlParseContent(ctxt);
11743 else
11744#endif
11745 xmlParseContent(ctxt);
11746
Daniel Veillard29b17482004-08-16 00:39:03 +000011747 nsPop(ctxt, nsnr);
11748 if ((RAW == '<') && (NXT(1) == '/')) {
11749 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11750 } else if (RAW != 0) {
11751 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11752 }
11753 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11754 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11755 ctxt->wellFormed = 0;
11756 }
11757
11758 if (!ctxt->wellFormed) {
11759 if (ctxt->errNo == 0)
11760 ret = XML_ERR_INTERNAL_ERROR;
11761 else
11762 ret = (xmlParserErrors)ctxt->errNo;
11763 } else {
11764 ret = XML_ERR_OK;
11765 }
11766
11767 /*
11768 * Return the newly created nodeset after unlinking it from
11769 * the pseudo sibling.
11770 */
11771
11772 cur = fake->next;
11773 fake->next = NULL;
11774 node->last = fake;
11775
11776 if (cur != NULL) {
11777 cur->prev = NULL;
11778 }
11779
11780 *lst = cur;
11781
11782 while (cur != NULL) {
11783 cur->parent = NULL;
11784 cur = cur->next;
11785 }
11786
11787 xmlUnlinkNode(fake);
11788 xmlFreeNode(fake);
11789
11790
11791 if (ret != XML_ERR_OK) {
11792 xmlFreeNodeList(*lst);
11793 *lst = NULL;
11794 }
William M. Brackc3f81342004-10-03 01:22:44 +000011795
William M. Brackb7b54de2004-10-06 16:38:01 +000011796 if (doc->dict != NULL)
11797 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011798 xmlFreeParserCtxt(ctxt);
11799
11800 return(ret);
11801#else /* !SAX2 */
11802 return(XML_ERR_INTERNAL_ERROR);
11803#endif
11804}
11805
Daniel Veillard81273902003-09-30 00:43:48 +000011806#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011807/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011808 * xmlParseBalancedChunkMemoryRecover:
11809 * @doc: the document the chunk pertains to
11810 * @sax: the SAX handler bloc (possibly NULL)
11811 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11812 * @depth: Used for loop detection, use 0
11813 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11814 * @lst: the return value for the set of parsed nodes
11815 * @recover: return nodes even if the data is broken (use 0)
11816 *
11817 *
11818 * Parse a well-balanced chunk of an XML document
11819 * called by the parser
11820 * The allowed sequence for the Well Balanced Chunk is the one defined by
11821 * the content production in the XML grammar:
11822 *
11823 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11824 *
11825 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11826 * the parser error code otherwise
11827 *
11828 * In case recover is set to 1, the nodelist will not be empty even if
11829 * the parsed chunk is not well balanced.
11830 */
11831int
11832xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11833 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11834 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011835 xmlParserCtxtPtr ctxt;
11836 xmlDocPtr newDoc;
11837 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011838 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011839 int size;
11840 int ret = 0;
11841
11842 if (depth > 40) {
11843 return(XML_ERR_ENTITY_LOOP);
11844 }
11845
11846
Daniel Veillardcda96922001-08-21 10:56:31 +000011847 if (lst != NULL)
11848 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011849 if (string == NULL)
11850 return(-1);
11851
11852 size = xmlStrlen(string);
11853
11854 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11855 if (ctxt == NULL) return(-1);
11856 ctxt->userData = ctxt;
11857 if (sax != NULL) {
11858 oldsax = ctxt->sax;
11859 ctxt->sax = sax;
11860 if (user_data != NULL)
11861 ctxt->userData = user_data;
11862 }
11863 newDoc = xmlNewDoc(BAD_CAST "1.0");
11864 if (newDoc == NULL) {
11865 xmlFreeParserCtxt(ctxt);
11866 return(-1);
11867 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011868 if ((doc != NULL) && (doc->dict != NULL)) {
11869 xmlDictFree(ctxt->dict);
11870 ctxt->dict = doc->dict;
11871 xmlDictReference(ctxt->dict);
11872 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11873 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11874 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11875 ctxt->dictNames = 1;
11876 } else {
11877 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11878 }
Owen Taylor3473f882001-02-23 17:55:21 +000011879 if (doc != NULL) {
11880 newDoc->intSubset = doc->intSubset;
11881 newDoc->extSubset = doc->extSubset;
11882 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011883 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11884 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011885 if (sax != NULL)
11886 ctxt->sax = oldsax;
11887 xmlFreeParserCtxt(ctxt);
11888 newDoc->intSubset = NULL;
11889 newDoc->extSubset = NULL;
11890 xmlFreeDoc(newDoc);
11891 return(-1);
11892 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011893 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11894 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011895 if (doc == NULL) {
11896 ctxt->myDoc = newDoc;
11897 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011898 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011899 newDoc->children->doc = doc;
11900 }
11901 ctxt->instate = XML_PARSER_CONTENT;
11902 ctxt->depth = depth;
11903
11904 /*
11905 * Doing validity checking on chunk doesn't make sense
11906 */
11907 ctxt->validate = 0;
11908 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011909 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011910
Daniel Veillardb39bc392002-10-26 19:29:51 +000011911 if ( doc != NULL ){
11912 content = doc->children;
11913 doc->children = NULL;
11914 xmlParseContent(ctxt);
11915 doc->children = content;
11916 }
11917 else {
11918 xmlParseContent(ctxt);
11919 }
Owen Taylor3473f882001-02-23 17:55:21 +000011920 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011921 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011922 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011923 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011924 }
11925 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011927 }
11928
11929 if (!ctxt->wellFormed) {
11930 if (ctxt->errNo == 0)
11931 ret = 1;
11932 else
11933 ret = ctxt->errNo;
11934 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011935 ret = 0;
11936 }
11937
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011938 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11939 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011940
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011941 /*
11942 * Return the newly created nodeset after unlinking it from
11943 * they pseudo parent.
11944 */
11945 cur = newDoc->children->children;
11946 *lst = cur;
11947 while (cur != NULL) {
11948 xmlSetTreeDoc(cur, doc);
11949 cur->parent = NULL;
11950 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011951 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011952 newDoc->children->children = NULL;
11953 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011954
Owen Taylor3473f882001-02-23 17:55:21 +000011955 if (sax != NULL)
11956 ctxt->sax = oldsax;
11957 xmlFreeParserCtxt(ctxt);
11958 newDoc->intSubset = NULL;
11959 newDoc->extSubset = NULL;
11960 xmlFreeDoc(newDoc);
11961
11962 return(ret);
11963}
11964
11965/**
11966 * xmlSAXParseEntity:
11967 * @sax: the SAX handler block
11968 * @filename: the filename
11969 *
11970 * parse an XML external entity out of context and build a tree.
11971 * It use the given SAX function block to handle the parsing callback.
11972 * If sax is NULL, fallback to the default DOM tree building routines.
11973 *
11974 * [78] extParsedEnt ::= TextDecl? content
11975 *
11976 * This correspond to a "Well Balanced" chunk
11977 *
11978 * Returns the resulting document tree
11979 */
11980
11981xmlDocPtr
11982xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11983 xmlDocPtr ret;
11984 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011985
11986 ctxt = xmlCreateFileParserCtxt(filename);
11987 if (ctxt == NULL) {
11988 return(NULL);
11989 }
11990 if (sax != NULL) {
11991 if (ctxt->sax != NULL)
11992 xmlFree(ctxt->sax);
11993 ctxt->sax = sax;
11994 ctxt->userData = NULL;
11995 }
11996
Owen Taylor3473f882001-02-23 17:55:21 +000011997 xmlParseExtParsedEnt(ctxt);
11998
11999 if (ctxt->wellFormed)
12000 ret = ctxt->myDoc;
12001 else {
12002 ret = NULL;
12003 xmlFreeDoc(ctxt->myDoc);
12004 ctxt->myDoc = NULL;
12005 }
12006 if (sax != NULL)
12007 ctxt->sax = NULL;
12008 xmlFreeParserCtxt(ctxt);
12009
12010 return(ret);
12011}
12012
12013/**
12014 * xmlParseEntity:
12015 * @filename: the filename
12016 *
12017 * parse an XML external entity out of context and build a tree.
12018 *
12019 * [78] extParsedEnt ::= TextDecl? content
12020 *
12021 * This correspond to a "Well Balanced" chunk
12022 *
12023 * Returns the resulting document tree
12024 */
12025
12026xmlDocPtr
12027xmlParseEntity(const char *filename) {
12028 return(xmlSAXParseEntity(NULL, filename));
12029}
Daniel Veillard81273902003-09-30 00:43:48 +000012030#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012031
12032/**
12033 * xmlCreateEntityParserCtxt:
12034 * @URL: the entity URL
12035 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012036 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012037 *
12038 * Create a parser context for an external entity
12039 * Automatic support for ZLIB/Compress compressed document is provided
12040 * by default if found at compile-time.
12041 *
12042 * Returns the new parser context or NULL
12043 */
12044xmlParserCtxtPtr
12045xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12046 const xmlChar *base) {
12047 xmlParserCtxtPtr ctxt;
12048 xmlParserInputPtr inputStream;
12049 char *directory = NULL;
12050 xmlChar *uri;
12051
12052 ctxt = xmlNewParserCtxt();
12053 if (ctxt == NULL) {
12054 return(NULL);
12055 }
12056
12057 uri = xmlBuildURI(URL, base);
12058
12059 if (uri == NULL) {
12060 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12061 if (inputStream == NULL) {
12062 xmlFreeParserCtxt(ctxt);
12063 return(NULL);
12064 }
12065
12066 inputPush(ctxt, inputStream);
12067
12068 if ((ctxt->directory == NULL) && (directory == NULL))
12069 directory = xmlParserGetDirectory((char *)URL);
12070 if ((ctxt->directory == NULL) && (directory != NULL))
12071 ctxt->directory = directory;
12072 } else {
12073 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12074 if (inputStream == NULL) {
12075 xmlFree(uri);
12076 xmlFreeParserCtxt(ctxt);
12077 return(NULL);
12078 }
12079
12080 inputPush(ctxt, inputStream);
12081
12082 if ((ctxt->directory == NULL) && (directory == NULL))
12083 directory = xmlParserGetDirectory((char *)uri);
12084 if ((ctxt->directory == NULL) && (directory != NULL))
12085 ctxt->directory = directory;
12086 xmlFree(uri);
12087 }
Owen Taylor3473f882001-02-23 17:55:21 +000012088 return(ctxt);
12089}
12090
12091/************************************************************************
12092 * *
12093 * Front ends when parsing from a file *
12094 * *
12095 ************************************************************************/
12096
12097/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012098 * xmlCreateURLParserCtxt:
12099 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012100 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012101 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012102 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012103 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012104 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012105 *
12106 * Returns the new parser context or NULL
12107 */
12108xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012109xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012110{
12111 xmlParserCtxtPtr ctxt;
12112 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012113 char *directory = NULL;
12114
Owen Taylor3473f882001-02-23 17:55:21 +000012115 ctxt = xmlNewParserCtxt();
12116 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012117 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012118 return(NULL);
12119 }
12120
Daniel Veillarddf292f72005-01-16 19:00:15 +000012121 if (options)
12122 xmlCtxtUseOptions(ctxt, options);
12123 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012124
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012125 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012126 if (inputStream == NULL) {
12127 xmlFreeParserCtxt(ctxt);
12128 return(NULL);
12129 }
12130
Owen Taylor3473f882001-02-23 17:55:21 +000012131 inputPush(ctxt, inputStream);
12132 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012133 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012134 if ((ctxt->directory == NULL) && (directory != NULL))
12135 ctxt->directory = directory;
12136
12137 return(ctxt);
12138}
12139
Daniel Veillard61b93382003-11-03 14:28:31 +000012140/**
12141 * xmlCreateFileParserCtxt:
12142 * @filename: the filename
12143 *
12144 * Create a parser context for a file content.
12145 * Automatic support for ZLIB/Compress compressed document is provided
12146 * by default if found at compile-time.
12147 *
12148 * Returns the new parser context or NULL
12149 */
12150xmlParserCtxtPtr
12151xmlCreateFileParserCtxt(const char *filename)
12152{
12153 return(xmlCreateURLParserCtxt(filename, 0));
12154}
12155
Daniel Veillard81273902003-09-30 00:43:48 +000012156#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012157/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012158 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012159 * @sax: the SAX handler block
12160 * @filename: the filename
12161 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12162 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012163 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012164 *
12165 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12166 * compressed document is provided by default if found at compile-time.
12167 * It use the given SAX function block to handle the parsing callback.
12168 * If sax is NULL, fallback to the default DOM tree building routines.
12169 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012170 * User data (void *) is stored within the parser context in the
12171 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012172 *
Owen Taylor3473f882001-02-23 17:55:21 +000012173 * Returns the resulting document tree
12174 */
12175
12176xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012177xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12178 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012179 xmlDocPtr ret;
12180 xmlParserCtxtPtr ctxt;
12181 char *directory = NULL;
12182
Daniel Veillard635ef722001-10-29 11:48:19 +000012183 xmlInitParser();
12184
Owen Taylor3473f882001-02-23 17:55:21 +000012185 ctxt = xmlCreateFileParserCtxt(filename);
12186 if (ctxt == NULL) {
12187 return(NULL);
12188 }
12189 if (sax != NULL) {
12190 if (ctxt->sax != NULL)
12191 xmlFree(ctxt->sax);
12192 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012193 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012194 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012195 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012196 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012197 }
Owen Taylor3473f882001-02-23 17:55:21 +000012198
12199 if ((ctxt->directory == NULL) && (directory == NULL))
12200 directory = xmlParserGetDirectory(filename);
12201 if ((ctxt->directory == NULL) && (directory != NULL))
12202 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12203
Daniel Veillarddad3f682002-11-17 16:47:27 +000012204 ctxt->recovery = recovery;
12205
Owen Taylor3473f882001-02-23 17:55:21 +000012206 xmlParseDocument(ctxt);
12207
William M. Brackc07329e2003-09-08 01:57:30 +000012208 if ((ctxt->wellFormed) || recovery) {
12209 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012210 if (ret != NULL) {
12211 if (ctxt->input->buf->compressed > 0)
12212 ret->compression = 9;
12213 else
12214 ret->compression = ctxt->input->buf->compressed;
12215 }
William M. Brackc07329e2003-09-08 01:57:30 +000012216 }
Owen Taylor3473f882001-02-23 17:55:21 +000012217 else {
12218 ret = NULL;
12219 xmlFreeDoc(ctxt->myDoc);
12220 ctxt->myDoc = NULL;
12221 }
12222 if (sax != NULL)
12223 ctxt->sax = NULL;
12224 xmlFreeParserCtxt(ctxt);
12225
12226 return(ret);
12227}
12228
12229/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012230 * xmlSAXParseFile:
12231 * @sax: the SAX handler block
12232 * @filename: the filename
12233 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12234 * documents
12235 *
12236 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12237 * compressed document is provided by default if found at compile-time.
12238 * It use the given SAX function block to handle the parsing callback.
12239 * If sax is NULL, fallback to the default DOM tree building routines.
12240 *
12241 * Returns the resulting document tree
12242 */
12243
12244xmlDocPtr
12245xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12246 int recovery) {
12247 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12248}
12249
12250/**
Owen Taylor3473f882001-02-23 17:55:21 +000012251 * xmlRecoverDoc:
12252 * @cur: a pointer to an array of xmlChar
12253 *
12254 * parse an XML in-memory document and build a tree.
12255 * In the case the document is not Well Formed, a tree is built anyway
12256 *
12257 * Returns the resulting document tree
12258 */
12259
12260xmlDocPtr
12261xmlRecoverDoc(xmlChar *cur) {
12262 return(xmlSAXParseDoc(NULL, cur, 1));
12263}
12264
12265/**
12266 * xmlParseFile:
12267 * @filename: the filename
12268 *
12269 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12270 * compressed document is provided by default if found at compile-time.
12271 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012272 * Returns the resulting document tree if the file was wellformed,
12273 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012274 */
12275
12276xmlDocPtr
12277xmlParseFile(const char *filename) {
12278 return(xmlSAXParseFile(NULL, filename, 0));
12279}
12280
12281/**
12282 * xmlRecoverFile:
12283 * @filename: the filename
12284 *
12285 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12286 * compressed document is provided by default if found at compile-time.
12287 * In the case the document is not Well Formed, a tree is built anyway
12288 *
12289 * Returns the resulting document tree
12290 */
12291
12292xmlDocPtr
12293xmlRecoverFile(const char *filename) {
12294 return(xmlSAXParseFile(NULL, filename, 1));
12295}
12296
12297
12298/**
12299 * xmlSetupParserForBuffer:
12300 * @ctxt: an XML parser context
12301 * @buffer: a xmlChar * buffer
12302 * @filename: a file name
12303 *
12304 * Setup the parser context to parse a new buffer; Clears any prior
12305 * contents from the parser context. The buffer parameter must not be
12306 * NULL, but the filename parameter can be
12307 */
12308void
12309xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12310 const char* filename)
12311{
12312 xmlParserInputPtr input;
12313
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012314 if ((ctxt == NULL) || (buffer == NULL))
12315 return;
12316
Owen Taylor3473f882001-02-23 17:55:21 +000012317 input = xmlNewInputStream(ctxt);
12318 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012319 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012320 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012321 return;
12322 }
12323
12324 xmlClearParserCtxt(ctxt);
12325 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012326 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012327 input->base = buffer;
12328 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012329 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012330 inputPush(ctxt, input);
12331}
12332
12333/**
12334 * xmlSAXUserParseFile:
12335 * @sax: a SAX handler
12336 * @user_data: The user data returned on SAX callbacks
12337 * @filename: a file name
12338 *
12339 * parse an XML file and call the given SAX handler routines.
12340 * Automatic support for ZLIB/Compress compressed document is provided
12341 *
12342 * Returns 0 in case of success or a error number otherwise
12343 */
12344int
12345xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12346 const char *filename) {
12347 int ret = 0;
12348 xmlParserCtxtPtr ctxt;
12349
12350 ctxt = xmlCreateFileParserCtxt(filename);
12351 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012352#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012353 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012354#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012355 xmlFree(ctxt->sax);
12356 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012357 xmlDetectSAX2(ctxt);
12358
Owen Taylor3473f882001-02-23 17:55:21 +000012359 if (user_data != NULL)
12360 ctxt->userData = user_data;
12361
12362 xmlParseDocument(ctxt);
12363
12364 if (ctxt->wellFormed)
12365 ret = 0;
12366 else {
12367 if (ctxt->errNo != 0)
12368 ret = ctxt->errNo;
12369 else
12370 ret = -1;
12371 }
12372 if (sax != NULL)
12373 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012374 if (ctxt->myDoc != NULL) {
12375 xmlFreeDoc(ctxt->myDoc);
12376 ctxt->myDoc = NULL;
12377 }
Owen Taylor3473f882001-02-23 17:55:21 +000012378 xmlFreeParserCtxt(ctxt);
12379
12380 return ret;
12381}
Daniel Veillard81273902003-09-30 00:43:48 +000012382#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012383
12384/************************************************************************
12385 * *
12386 * Front ends when parsing from memory *
12387 * *
12388 ************************************************************************/
12389
12390/**
12391 * xmlCreateMemoryParserCtxt:
12392 * @buffer: a pointer to a char array
12393 * @size: the size of the array
12394 *
12395 * Create a parser context for an XML in-memory document.
12396 *
12397 * Returns the new parser context or NULL
12398 */
12399xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012400xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012401 xmlParserCtxtPtr ctxt;
12402 xmlParserInputPtr input;
12403 xmlParserInputBufferPtr buf;
12404
12405 if (buffer == NULL)
12406 return(NULL);
12407 if (size <= 0)
12408 return(NULL);
12409
12410 ctxt = xmlNewParserCtxt();
12411 if (ctxt == NULL)
12412 return(NULL);
12413
Daniel Veillard53350552003-09-18 13:35:51 +000012414 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012415 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012416 if (buf == NULL) {
12417 xmlFreeParserCtxt(ctxt);
12418 return(NULL);
12419 }
Owen Taylor3473f882001-02-23 17:55:21 +000012420
12421 input = xmlNewInputStream(ctxt);
12422 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012423 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012424 xmlFreeParserCtxt(ctxt);
12425 return(NULL);
12426 }
12427
12428 input->filename = NULL;
12429 input->buf = buf;
12430 input->base = input->buf->buffer->content;
12431 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012432 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012433
12434 inputPush(ctxt, input);
12435 return(ctxt);
12436}
12437
Daniel Veillard81273902003-09-30 00:43:48 +000012438#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012439/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012440 * xmlSAXParseMemoryWithData:
12441 * @sax: the SAX handler block
12442 * @buffer: an pointer to a char array
12443 * @size: the size of the array
12444 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12445 * documents
12446 * @data: the userdata
12447 *
12448 * parse an XML in-memory block and use the given SAX function block
12449 * to handle the parsing callback. If sax is NULL, fallback to the default
12450 * DOM tree building routines.
12451 *
12452 * User data (void *) is stored within the parser context in the
12453 * context's _private member, so it is available nearly everywhere in libxml
12454 *
12455 * Returns the resulting document tree
12456 */
12457
12458xmlDocPtr
12459xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12460 int size, int recovery, void *data) {
12461 xmlDocPtr ret;
12462 xmlParserCtxtPtr ctxt;
12463
12464 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12465 if (ctxt == NULL) return(NULL);
12466 if (sax != NULL) {
12467 if (ctxt->sax != NULL)
12468 xmlFree(ctxt->sax);
12469 ctxt->sax = sax;
12470 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012471 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012472 if (data!=NULL) {
12473 ctxt->_private=data;
12474 }
12475
Daniel Veillardadba5f12003-04-04 16:09:01 +000012476 ctxt->recovery = recovery;
12477
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012478 xmlParseDocument(ctxt);
12479
12480 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12481 else {
12482 ret = NULL;
12483 xmlFreeDoc(ctxt->myDoc);
12484 ctxt->myDoc = NULL;
12485 }
12486 if (sax != NULL)
12487 ctxt->sax = NULL;
12488 xmlFreeParserCtxt(ctxt);
12489
12490 return(ret);
12491}
12492
12493/**
Owen Taylor3473f882001-02-23 17:55:21 +000012494 * xmlSAXParseMemory:
12495 * @sax: the SAX handler block
12496 * @buffer: an pointer to a char array
12497 * @size: the size of the array
12498 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12499 * documents
12500 *
12501 * parse an XML in-memory block and use the given SAX function block
12502 * to handle the parsing callback. If sax is NULL, fallback to the default
12503 * DOM tree building routines.
12504 *
12505 * Returns the resulting document tree
12506 */
12507xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012508xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12509 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012510 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012511}
12512
12513/**
12514 * xmlParseMemory:
12515 * @buffer: an pointer to a char array
12516 * @size: the size of the array
12517 *
12518 * parse an XML in-memory block and build a tree.
12519 *
12520 * Returns the resulting document tree
12521 */
12522
Daniel Veillard50822cb2001-07-26 20:05:51 +000012523xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012524 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12525}
12526
12527/**
12528 * xmlRecoverMemory:
12529 * @buffer: an pointer to a char array
12530 * @size: the size of the array
12531 *
12532 * parse an XML in-memory block and build a tree.
12533 * In the case the document is not Well Formed, a tree is built anyway
12534 *
12535 * Returns the resulting document tree
12536 */
12537
Daniel Veillard50822cb2001-07-26 20:05:51 +000012538xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012539 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12540}
12541
12542/**
12543 * xmlSAXUserParseMemory:
12544 * @sax: a SAX handler
12545 * @user_data: The user data returned on SAX callbacks
12546 * @buffer: an in-memory XML document input
12547 * @size: the length of the XML document in bytes
12548 *
12549 * A better SAX parsing routine.
12550 * parse an XML in-memory buffer and call the given SAX handler routines.
12551 *
12552 * Returns 0 in case of success or a error number otherwise
12553 */
12554int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012555 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012556 int ret = 0;
12557 xmlParserCtxtPtr ctxt;
12558 xmlSAXHandlerPtr oldsax = NULL;
12559
Daniel Veillard9e923512002-08-14 08:48:52 +000012560 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012561 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12562 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012563 oldsax = ctxt->sax;
12564 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012565 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012566 if (user_data != NULL)
12567 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012568
12569 xmlParseDocument(ctxt);
12570
12571 if (ctxt->wellFormed)
12572 ret = 0;
12573 else {
12574 if (ctxt->errNo != 0)
12575 ret = ctxt->errNo;
12576 else
12577 ret = -1;
12578 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012579 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012580 if (ctxt->myDoc != NULL) {
12581 xmlFreeDoc(ctxt->myDoc);
12582 ctxt->myDoc = NULL;
12583 }
Owen Taylor3473f882001-02-23 17:55:21 +000012584 xmlFreeParserCtxt(ctxt);
12585
12586 return ret;
12587}
Daniel Veillard81273902003-09-30 00:43:48 +000012588#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012589
12590/**
12591 * xmlCreateDocParserCtxt:
12592 * @cur: a pointer to an array of xmlChar
12593 *
12594 * Creates a parser context for an XML in-memory document.
12595 *
12596 * Returns the new parser context or NULL
12597 */
12598xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012599xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012600 int len;
12601
12602 if (cur == NULL)
12603 return(NULL);
12604 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012605 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012606}
12607
Daniel Veillard81273902003-09-30 00:43:48 +000012608#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012609/**
12610 * xmlSAXParseDoc:
12611 * @sax: the SAX handler block
12612 * @cur: a pointer to an array of xmlChar
12613 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12614 * documents
12615 *
12616 * parse an XML in-memory document and build a tree.
12617 * It use the given SAX function block to handle the parsing callback.
12618 * If sax is NULL, fallback to the default DOM tree building routines.
12619 *
12620 * Returns the resulting document tree
12621 */
12622
12623xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012624xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012625 xmlDocPtr ret;
12626 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012627 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012628
Daniel Veillard38936062004-11-04 17:45:11 +000012629 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012630
12631
12632 ctxt = xmlCreateDocParserCtxt(cur);
12633 if (ctxt == NULL) return(NULL);
12634 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012635 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012636 ctxt->sax = sax;
12637 ctxt->userData = NULL;
12638 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012639 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012640
12641 xmlParseDocument(ctxt);
12642 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12643 else {
12644 ret = NULL;
12645 xmlFreeDoc(ctxt->myDoc);
12646 ctxt->myDoc = NULL;
12647 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012648 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012649 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012650 xmlFreeParserCtxt(ctxt);
12651
12652 return(ret);
12653}
12654
12655/**
12656 * xmlParseDoc:
12657 * @cur: a pointer to an array of xmlChar
12658 *
12659 * parse an XML in-memory document and build a tree.
12660 *
12661 * Returns the resulting document tree
12662 */
12663
12664xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012665xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012666 return(xmlSAXParseDoc(NULL, cur, 0));
12667}
Daniel Veillard81273902003-09-30 00:43:48 +000012668#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012669
Daniel Veillard81273902003-09-30 00:43:48 +000012670#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012671/************************************************************************
12672 * *
12673 * Specific function to keep track of entities references *
12674 * and used by the XSLT debugger *
12675 * *
12676 ************************************************************************/
12677
12678static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12679
12680/**
12681 * xmlAddEntityReference:
12682 * @ent : A valid entity
12683 * @firstNode : A valid first node for children of entity
12684 * @lastNode : A valid last node of children entity
12685 *
12686 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12687 */
12688static void
12689xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12690 xmlNodePtr lastNode)
12691{
12692 if (xmlEntityRefFunc != NULL) {
12693 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12694 }
12695}
12696
12697
12698/**
12699 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012700 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012701 *
12702 * Set the function to call call back when a xml reference has been made
12703 */
12704void
12705xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12706{
12707 xmlEntityRefFunc = func;
12708}
Daniel Veillard81273902003-09-30 00:43:48 +000012709#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012710
12711/************************************************************************
12712 * *
12713 * Miscellaneous *
12714 * *
12715 ************************************************************************/
12716
12717#ifdef LIBXML_XPATH_ENABLED
12718#include <libxml/xpath.h>
12719#endif
12720
Daniel Veillardffa3c742005-07-21 13:24:09 +000012721extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012722static int xmlParserInitialized = 0;
12723
12724/**
12725 * xmlInitParser:
12726 *
12727 * Initialization function for the XML parser.
12728 * This is not reentrant. Call once before processing in case of
12729 * use in multithreaded programs.
12730 */
12731
12732void
12733xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012734 if (xmlParserInitialized != 0)
12735 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012736
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012737 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12738 (xmlGenericError == NULL))
12739 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012740 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012741 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012742 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012743 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012744 xmlDefaultSAXHandlerInit();
12745 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012746#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012747 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012748#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012749#ifdef LIBXML_HTML_ENABLED
12750 htmlInitAutoClose();
12751 htmlDefaultSAXHandlerInit();
12752#endif
12753#ifdef LIBXML_XPATH_ENABLED
12754 xmlXPathInit();
12755#endif
12756 xmlParserInitialized = 1;
12757}
12758
12759/**
12760 * xmlCleanupParser:
12761 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012762 * Cleanup function for the XML library. It tries to reclaim all
12763 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012764 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012765 * function should not prevent reusing the library but one should
12766 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012767 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012768 */
12769
12770void
12771xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012772 if (!xmlParserInitialized)
12773 return;
12774
Owen Taylor3473f882001-02-23 17:55:21 +000012775 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012776#ifdef LIBXML_CATALOG_ENABLED
12777 xmlCatalogCleanup();
12778#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012779 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012780 xmlCleanupInputCallbacks();
12781#ifdef LIBXML_OUTPUT_ENABLED
12782 xmlCleanupOutputCallbacks();
12783#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012784#ifdef LIBXML_SCHEMAS_ENABLED
12785 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012786 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012787#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012788 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012789 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012790 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012791 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012792 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012793}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012794
12795/************************************************************************
12796 * *
12797 * New set (2.6.0) of simpler and more flexible APIs *
12798 * *
12799 ************************************************************************/
12800
12801/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012802 * DICT_FREE:
12803 * @str: a string
12804 *
12805 * Free a string if it is not owned by the "dict" dictionnary in the
12806 * current scope
12807 */
12808#define DICT_FREE(str) \
12809 if ((str) && ((!dict) || \
12810 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12811 xmlFree((char *)(str));
12812
12813/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012814 * xmlCtxtReset:
12815 * @ctxt: an XML parser context
12816 *
12817 * Reset a parser context
12818 */
12819void
12820xmlCtxtReset(xmlParserCtxtPtr ctxt)
12821{
12822 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012823 xmlDictPtr dict;
12824
12825 if (ctxt == NULL)
12826 return;
12827
12828 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012829
12830 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12831 xmlFreeInputStream(input);
12832 }
12833 ctxt->inputNr = 0;
12834 ctxt->input = NULL;
12835
12836 ctxt->spaceNr = 0;
12837 ctxt->spaceTab[0] = -1;
12838 ctxt->space = &ctxt->spaceTab[0];
12839
12840
12841 ctxt->nodeNr = 0;
12842 ctxt->node = NULL;
12843
12844 ctxt->nameNr = 0;
12845 ctxt->name = NULL;
12846
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012847 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012848 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012849 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012850 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012851 DICT_FREE(ctxt->directory);
12852 ctxt->directory = NULL;
12853 DICT_FREE(ctxt->extSubURI);
12854 ctxt->extSubURI = NULL;
12855 DICT_FREE(ctxt->extSubSystem);
12856 ctxt->extSubSystem = NULL;
12857 if (ctxt->myDoc != NULL)
12858 xmlFreeDoc(ctxt->myDoc);
12859 ctxt->myDoc = NULL;
12860
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012861 ctxt->standalone = -1;
12862 ctxt->hasExternalSubset = 0;
12863 ctxt->hasPErefs = 0;
12864 ctxt->html = 0;
12865 ctxt->external = 0;
12866 ctxt->instate = XML_PARSER_START;
12867 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012868
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012869 ctxt->wellFormed = 1;
12870 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012871 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012872 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012873#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012874 ctxt->vctxt.userData = ctxt;
12875 ctxt->vctxt.error = xmlParserValidityError;
12876 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012877#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012878 ctxt->record_info = 0;
12879 ctxt->nbChars = 0;
12880 ctxt->checkIndex = 0;
12881 ctxt->inSubset = 0;
12882 ctxt->errNo = XML_ERR_OK;
12883 ctxt->depth = 0;
12884 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12885 ctxt->catalogs = NULL;
12886 xmlInitNodeInfoSeq(&ctxt->node_seq);
12887
12888 if (ctxt->attsDefault != NULL) {
12889 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12890 ctxt->attsDefault = NULL;
12891 }
12892 if (ctxt->attsSpecial != NULL) {
12893 xmlHashFree(ctxt->attsSpecial, NULL);
12894 ctxt->attsSpecial = NULL;
12895 }
12896
Daniel Veillard4432df22003-09-28 18:58:27 +000012897#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012898 if (ctxt->catalogs != NULL)
12899 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012900#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012901 if (ctxt->lastError.code != XML_ERR_OK)
12902 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012903}
12904
12905/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012906 * xmlCtxtResetPush:
12907 * @ctxt: an XML parser context
12908 * @chunk: a pointer to an array of chars
12909 * @size: number of chars in the array
12910 * @filename: an optional file name or URI
12911 * @encoding: the document encoding, or NULL
12912 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012913 * Reset a push parser context
12914 *
12915 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012916 */
12917int
12918xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12919 int size, const char *filename, const char *encoding)
12920{
12921 xmlParserInputPtr inputStream;
12922 xmlParserInputBufferPtr buf;
12923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12924
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012925 if (ctxt == NULL)
12926 return(1);
12927
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012928 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12930
12931 buf = xmlAllocParserInputBuffer(enc);
12932 if (buf == NULL)
12933 return(1);
12934
12935 if (ctxt == NULL) {
12936 xmlFreeParserInputBuffer(buf);
12937 return(1);
12938 }
12939
12940 xmlCtxtReset(ctxt);
12941
12942 if (ctxt->pushTab == NULL) {
12943 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12944 sizeof(xmlChar *));
12945 if (ctxt->pushTab == NULL) {
12946 xmlErrMemory(ctxt, NULL);
12947 xmlFreeParserInputBuffer(buf);
12948 return(1);
12949 }
12950 }
12951
12952 if (filename == NULL) {
12953 ctxt->directory = NULL;
12954 } else {
12955 ctxt->directory = xmlParserGetDirectory(filename);
12956 }
12957
12958 inputStream = xmlNewInputStream(ctxt);
12959 if (inputStream == NULL) {
12960 xmlFreeParserInputBuffer(buf);
12961 return(1);
12962 }
12963
12964 if (filename == NULL)
12965 inputStream->filename = NULL;
12966 else
12967 inputStream->filename = (char *)
12968 xmlCanonicPath((const xmlChar *) filename);
12969 inputStream->buf = buf;
12970 inputStream->base = inputStream->buf->buffer->content;
12971 inputStream->cur = inputStream->buf->buffer->content;
12972 inputStream->end =
12973 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12974
12975 inputPush(ctxt, inputStream);
12976
12977 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12978 (ctxt->input->buf != NULL)) {
12979 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12980 int cur = ctxt->input->cur - ctxt->input->base;
12981
12982 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12983
12984 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12985 ctxt->input->cur = ctxt->input->base + cur;
12986 ctxt->input->end =
12987 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12988 use];
12989#ifdef DEBUG_PUSH
12990 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12991#endif
12992 }
12993
12994 if (encoding != NULL) {
12995 xmlCharEncodingHandlerPtr hdlr;
12996
12997 hdlr = xmlFindCharEncodingHandler(encoding);
12998 if (hdlr != NULL) {
12999 xmlSwitchToEncoding(ctxt, hdlr);
13000 } else {
13001 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13002 "Unsupported encoding %s\n", BAD_CAST encoding);
13003 }
13004 } else if (enc != XML_CHAR_ENCODING_NONE) {
13005 xmlSwitchEncoding(ctxt, enc);
13006 }
13007
13008 return(0);
13009}
13010
13011/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013012 * xmlCtxtUseOptions:
13013 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013014 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013015 *
13016 * Applies the options to the parser context
13017 *
13018 * Returns 0 in case of success, the set of unknown or unimplemented options
13019 * in case of error.
13020 */
13021int
13022xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13023{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013024 if (ctxt == NULL)
13025 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013026 if (options & XML_PARSE_RECOVER) {
13027 ctxt->recovery = 1;
13028 options -= XML_PARSE_RECOVER;
13029 } else
13030 ctxt->recovery = 0;
13031 if (options & XML_PARSE_DTDLOAD) {
13032 ctxt->loadsubset = XML_DETECT_IDS;
13033 options -= XML_PARSE_DTDLOAD;
13034 } else
13035 ctxt->loadsubset = 0;
13036 if (options & XML_PARSE_DTDATTR) {
13037 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13038 options -= XML_PARSE_DTDATTR;
13039 }
13040 if (options & XML_PARSE_NOENT) {
13041 ctxt->replaceEntities = 1;
13042 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13043 options -= XML_PARSE_NOENT;
13044 } else
13045 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013046 if (options & XML_PARSE_PEDANTIC) {
13047 ctxt->pedantic = 1;
13048 options -= XML_PARSE_PEDANTIC;
13049 } else
13050 ctxt->pedantic = 0;
13051 if (options & XML_PARSE_NOBLANKS) {
13052 ctxt->keepBlanks = 0;
13053 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13054 options -= XML_PARSE_NOBLANKS;
13055 } else
13056 ctxt->keepBlanks = 1;
13057 if (options & XML_PARSE_DTDVALID) {
13058 ctxt->validate = 1;
13059 if (options & XML_PARSE_NOWARNING)
13060 ctxt->vctxt.warning = NULL;
13061 if (options & XML_PARSE_NOERROR)
13062 ctxt->vctxt.error = NULL;
13063 options -= XML_PARSE_DTDVALID;
13064 } else
13065 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013066 if (options & XML_PARSE_NOWARNING) {
13067 ctxt->sax->warning = NULL;
13068 options -= XML_PARSE_NOWARNING;
13069 }
13070 if (options & XML_PARSE_NOERROR) {
13071 ctxt->sax->error = NULL;
13072 ctxt->sax->fatalError = NULL;
13073 options -= XML_PARSE_NOERROR;
13074 }
Daniel Veillard81273902003-09-30 00:43:48 +000013075#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013076 if (options & XML_PARSE_SAX1) {
13077 ctxt->sax->startElement = xmlSAX2StartElement;
13078 ctxt->sax->endElement = xmlSAX2EndElement;
13079 ctxt->sax->startElementNs = NULL;
13080 ctxt->sax->endElementNs = NULL;
13081 ctxt->sax->initialized = 1;
13082 options -= XML_PARSE_SAX1;
13083 }
Daniel Veillard81273902003-09-30 00:43:48 +000013084#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013085 if (options & XML_PARSE_NODICT) {
13086 ctxt->dictNames = 0;
13087 options -= XML_PARSE_NODICT;
13088 } else {
13089 ctxt->dictNames = 1;
13090 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013091 if (options & XML_PARSE_NOCDATA) {
13092 ctxt->sax->cdataBlock = NULL;
13093 options -= XML_PARSE_NOCDATA;
13094 }
13095 if (options & XML_PARSE_NSCLEAN) {
13096 ctxt->options |= XML_PARSE_NSCLEAN;
13097 options -= XML_PARSE_NSCLEAN;
13098 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013099 if (options & XML_PARSE_NONET) {
13100 ctxt->options |= XML_PARSE_NONET;
13101 options -= XML_PARSE_NONET;
13102 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013103 if (options & XML_PARSE_COMPACT) {
13104 ctxt->options |= XML_PARSE_COMPACT;
13105 options -= XML_PARSE_COMPACT;
13106 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013107 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013108 return (options);
13109}
13110
13111/**
13112 * xmlDoRead:
13113 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013114 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013115 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013116 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013117 * @reuse: keep the context for reuse
13118 *
13119 * Common front-end for the xmlRead functions
13120 *
13121 * Returns the resulting document tree or NULL
13122 */
13123static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013124xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13125 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013126{
13127 xmlDocPtr ret;
13128
13129 xmlCtxtUseOptions(ctxt, options);
13130 if (encoding != NULL) {
13131 xmlCharEncodingHandlerPtr hdlr;
13132
13133 hdlr = xmlFindCharEncodingHandler(encoding);
13134 if (hdlr != NULL)
13135 xmlSwitchToEncoding(ctxt, hdlr);
13136 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013137 if ((URL != NULL) && (ctxt->input != NULL) &&
13138 (ctxt->input->filename == NULL))
13139 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013140 xmlParseDocument(ctxt);
13141 if ((ctxt->wellFormed) || ctxt->recovery)
13142 ret = ctxt->myDoc;
13143 else {
13144 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013145 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013146 xmlFreeDoc(ctxt->myDoc);
13147 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013148 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013149 ctxt->myDoc = NULL;
13150 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013151 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013152 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013153
13154 return (ret);
13155}
13156
13157/**
13158 * xmlReadDoc:
13159 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013160 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013161 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013162 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013163 *
13164 * parse an XML in-memory document and build a tree.
13165 *
13166 * Returns the resulting document tree
13167 */
13168xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013169xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013170{
13171 xmlParserCtxtPtr ctxt;
13172
13173 if (cur == NULL)
13174 return (NULL);
13175
13176 ctxt = xmlCreateDocParserCtxt(cur);
13177 if (ctxt == NULL)
13178 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013179 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013180}
13181
13182/**
13183 * xmlReadFile:
13184 * @filename: a file or URL
13185 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013186 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013187 *
13188 * parse an XML file from the filesystem or the network.
13189 *
13190 * Returns the resulting document tree
13191 */
13192xmlDocPtr
13193xmlReadFile(const char *filename, const char *encoding, int options)
13194{
13195 xmlParserCtxtPtr ctxt;
13196
Daniel Veillard61b93382003-11-03 14:28:31 +000013197 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013198 if (ctxt == NULL)
13199 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013200 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013201}
13202
13203/**
13204 * xmlReadMemory:
13205 * @buffer: a pointer to a char array
13206 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013207 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013208 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013209 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013210 *
13211 * parse an XML in-memory document and build a tree.
13212 *
13213 * Returns the resulting document tree
13214 */
13215xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013216xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013217{
13218 xmlParserCtxtPtr ctxt;
13219
13220 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13221 if (ctxt == NULL)
13222 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013223 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013224}
13225
13226/**
13227 * xmlReadFd:
13228 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013229 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013230 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013231 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013232 *
13233 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013234 * NOTE that the file descriptor will not be closed when the
13235 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236 *
13237 * Returns the resulting document tree
13238 */
13239xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013240xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013241{
13242 xmlParserCtxtPtr ctxt;
13243 xmlParserInputBufferPtr input;
13244 xmlParserInputPtr stream;
13245
13246 if (fd < 0)
13247 return (NULL);
13248
13249 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13250 if (input == NULL)
13251 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013252 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013253 ctxt = xmlNewParserCtxt();
13254 if (ctxt == NULL) {
13255 xmlFreeParserInputBuffer(input);
13256 return (NULL);
13257 }
13258 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13259 if (stream == NULL) {
13260 xmlFreeParserInputBuffer(input);
13261 xmlFreeParserCtxt(ctxt);
13262 return (NULL);
13263 }
13264 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013265 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013266}
13267
13268/**
13269 * xmlReadIO:
13270 * @ioread: an I/O read function
13271 * @ioclose: an I/O close function
13272 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013273 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013274 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013275 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013276 *
13277 * parse an XML document from I/O functions and source and build a tree.
13278 *
13279 * Returns the resulting document tree
13280 */
13281xmlDocPtr
13282xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013283 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013284{
13285 xmlParserCtxtPtr ctxt;
13286 xmlParserInputBufferPtr input;
13287 xmlParserInputPtr stream;
13288
13289 if (ioread == NULL)
13290 return (NULL);
13291
13292 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13293 XML_CHAR_ENCODING_NONE);
13294 if (input == NULL)
13295 return (NULL);
13296 ctxt = xmlNewParserCtxt();
13297 if (ctxt == NULL) {
13298 xmlFreeParserInputBuffer(input);
13299 return (NULL);
13300 }
13301 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13302 if (stream == NULL) {
13303 xmlFreeParserInputBuffer(input);
13304 xmlFreeParserCtxt(ctxt);
13305 return (NULL);
13306 }
13307 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013308 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013309}
13310
13311/**
13312 * xmlCtxtReadDoc:
13313 * @ctxt: an XML parser context
13314 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013315 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013316 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013317 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013318 *
13319 * parse an XML in-memory document and build a tree.
13320 * This reuses the existing @ctxt parser context
13321 *
13322 * Returns the resulting document tree
13323 */
13324xmlDocPtr
13325xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013326 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327{
13328 xmlParserInputPtr stream;
13329
13330 if (cur == NULL)
13331 return (NULL);
13332 if (ctxt == NULL)
13333 return (NULL);
13334
13335 xmlCtxtReset(ctxt);
13336
13337 stream = xmlNewStringInputStream(ctxt, cur);
13338 if (stream == NULL) {
13339 return (NULL);
13340 }
13341 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013342 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013343}
13344
13345/**
13346 * xmlCtxtReadFile:
13347 * @ctxt: an XML parser context
13348 * @filename: a file or URL
13349 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013350 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013351 *
13352 * parse an XML file from the filesystem or the network.
13353 * This reuses the existing @ctxt parser context
13354 *
13355 * Returns the resulting document tree
13356 */
13357xmlDocPtr
13358xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13359 const char *encoding, int options)
13360{
13361 xmlParserInputPtr stream;
13362
13363 if (filename == NULL)
13364 return (NULL);
13365 if (ctxt == NULL)
13366 return (NULL);
13367
13368 xmlCtxtReset(ctxt);
13369
Daniel Veillard29614c72004-11-26 10:47:26 +000013370 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013371 if (stream == NULL) {
13372 return (NULL);
13373 }
13374 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013375 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013376}
13377
13378/**
13379 * xmlCtxtReadMemory:
13380 * @ctxt: an XML parser context
13381 * @buffer: a pointer to a char array
13382 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013383 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013384 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013385 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013386 *
13387 * parse an XML in-memory document and build a tree.
13388 * This reuses the existing @ctxt parser context
13389 *
13390 * Returns the resulting document tree
13391 */
13392xmlDocPtr
13393xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013394 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013395{
13396 xmlParserInputBufferPtr input;
13397 xmlParserInputPtr stream;
13398
13399 if (ctxt == NULL)
13400 return (NULL);
13401 if (buffer == NULL)
13402 return (NULL);
13403
13404 xmlCtxtReset(ctxt);
13405
13406 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13407 if (input == NULL) {
13408 return(NULL);
13409 }
13410
13411 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13412 if (stream == NULL) {
13413 xmlFreeParserInputBuffer(input);
13414 return(NULL);
13415 }
13416
13417 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013418 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013419}
13420
13421/**
13422 * xmlCtxtReadFd:
13423 * @ctxt: an XML parser context
13424 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013425 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013426 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013427 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013428 *
13429 * parse an XML from a file descriptor and build a tree.
13430 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013431 * NOTE that the file descriptor will not be closed when the
13432 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013433 *
13434 * Returns the resulting document tree
13435 */
13436xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013437xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13438 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013439{
13440 xmlParserInputBufferPtr input;
13441 xmlParserInputPtr stream;
13442
13443 if (fd < 0)
13444 return (NULL);
13445 if (ctxt == NULL)
13446 return (NULL);
13447
13448 xmlCtxtReset(ctxt);
13449
13450
13451 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13452 if (input == NULL)
13453 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013454 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013455 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13456 if (stream == NULL) {
13457 xmlFreeParserInputBuffer(input);
13458 return (NULL);
13459 }
13460 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013461 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462}
13463
13464/**
13465 * xmlCtxtReadIO:
13466 * @ctxt: an XML parser context
13467 * @ioread: an I/O read function
13468 * @ioclose: an I/O close function
13469 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013470 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013471 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013472 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013473 *
13474 * parse an XML document from I/O functions and source and build a tree.
13475 * This reuses the existing @ctxt parser context
13476 *
13477 * Returns the resulting document tree
13478 */
13479xmlDocPtr
13480xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13481 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013482 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013483 const char *encoding, int options)
13484{
13485 xmlParserInputBufferPtr input;
13486 xmlParserInputPtr stream;
13487
13488 if (ioread == NULL)
13489 return (NULL);
13490 if (ctxt == NULL)
13491 return (NULL);
13492
13493 xmlCtxtReset(ctxt);
13494
13495 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13496 XML_CHAR_ENCODING_NONE);
13497 if (input == NULL)
13498 return (NULL);
13499 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13500 if (stream == NULL) {
13501 xmlFreeParserInputBuffer(input);
13502 return (NULL);
13503 }
13504 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013505 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013506}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013507
13508#define bottom_parser
13509#include "elfgcchack.h"