blob: b405164c2d51b55666b0010734188b08eadd741d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
3077 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003213 } else
3214 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217
3218mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003219 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003221}
3222
3223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224 * xmlParseAttValue:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3230 *
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3233 *
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3252 *
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3254 */
3255
3256
3257xmlChar *
3258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261}
3262
3263/**
Owen Taylor3473f882001-02-23 17:55:21 +00003264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3266 *
3267 * parse an XML Literal
3268 *
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3270 *
3271 * Returns the SystemLiteral parsed or NULL
3272 */
3273
3274xmlChar *
3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3277 int len = 0;
3278 int size = XML_PARSER_BUFFER_SIZE;
3279 int cur, l;
3280 xmlChar stop;
3281 int state = ctxt->instate;
3282 int count = 0;
3283
3284 SHRINK;
3285 if (RAW == '"') {
3286 NEXT;
3287 stop = '"';
3288 } else if (RAW == '\'') {
3289 NEXT;
3290 stop = '\'';
3291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003293 return(NULL);
3294 }
3295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 xmlChar *tmp;
3306
Owen Taylor3473f882001-02-23 17:55:21 +00003307 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3309 if (tmp == NULL) {
3310 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 ctxt->instate = (xmlParserInputState) state;
3313 return(NULL);
3314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003316 }
3317 count++;
3318 if (count > 50) {
3319 GROW;
3320 count = 0;
3321 }
3322 COPY_BUF(l,buf,len,cur);
3323 NEXTL(l);
3324 cur = CUR_CHAR(l);
3325 if (cur == 0) {
3326 GROW;
3327 SHRINK;
3328 cur = CUR_CHAR(l);
3329 }
3330 }
3331 buf[len] = 0;
3332 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003333 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 } else {
3336 NEXT;
3337 }
3338 return(buf);
3339}
3340
3341/**
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML public literal
3346 *
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3348 *
3349 * Returns the PubidLiteral parsed or NULL.
3350 */
3351
3352xmlChar *
3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3355 int len = 0;
3356 int size = XML_PARSER_BUFFER_SIZE;
3357 xmlChar cur;
3358 xmlChar stop;
3359 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003360 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 SHRINK;
3363 if (RAW == '"') {
3364 NEXT;
3365 stop = '"';
3366 } else if (RAW == '\'') {
3367 NEXT;
3368 stop = '\'';
3369 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003371 return(NULL);
3372 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 xmlChar *tmp;
3383
Owen Taylor3473f882001-02-23 17:55:21 +00003384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(NULL);
3390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 }
3393 buf[len++] = cur;
3394 count++;
3395 if (count > 50) {
3396 GROW;
3397 count = 0;
3398 }
3399 NEXT;
3400 cur = CUR;
3401 if (cur == 0) {
3402 GROW;
3403 SHRINK;
3404 cur = CUR;
3405 }
3406 }
3407 buf[len] = 0;
3408 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 } else {
3411 NEXT;
3412 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003413 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(buf);
3415}
3416
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003418
3419/*
3420 * used for the test in the inner loop of the char data testing
3421 */
3422static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3455};
3456
Owen Taylor3473f882001-02-23 17:55:21 +00003457/**
3458 * xmlParseCharData:
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3461 *
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3464 *
3465 * The right angle bracket (>) may be represented using the string "&gt;",
3466 * and must, for compatibility, be escaped using "&gt;" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3469 *
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3471 */
3472
3473void
3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003479 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003480
3481 SHRINK;
3482 GROW;
3483 /*
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003487 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003488 in = ctxt->input->cur;
3489 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003490get_more_space:
3491 while (*in == 0x20) in++;
3492 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003494 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003496 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003497 goto get_more_space;
3498 }
3499 if (*in == '<') {
3500 nbchar = in - ctxt->input->cur;
3501 if (nbchar > 0) {
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3504
Daniel Veillard34099b42004-11-04 17:34:35 +00003505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 if (*ctxt->space == -1)
3517 *ctxt->space = -2;
3518 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003521 ctxt->sax->characters(ctxt->userData,
3522 tmp, nbchar);
3523 }
3524 }
3525 return;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003528get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003529 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003530 while (test_char_data[*in]) {
3531 in++;
3532 ccol++;
3533 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003535 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003536 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003537 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003539 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003540 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003541 }
3542 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003545 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003546 return;
3547 }
3548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 goto get_more;
3551 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003552 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003553 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003556 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003557 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003565 } else {
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
3569 if (*ctxt->space == -1)
3570 *ctxt->space = -2;
3571 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003572 line = ctxt->input->line;
3573 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003574 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003578 line = ctxt->input->line;
3579 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003583 if (*in == 0xD) {
3584 in++;
3585 if (*in == 0xA) {
3586 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003588 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 in--;
3592 }
3593 if (*in == '<') {
3594 return;
3595 }
3596 if (*in == '&') {
3597 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
3599 SHRINK;
3600 GROW;
3601 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 nbchar = 0;
3604 }
Daniel Veillard50582112001-03-26 22:52:16 +00003605 ctxt->input->line = line;
3606 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 xmlParseCharDataComplex(ctxt, cdata);
3608}
3609
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610/**
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3614 *
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3618 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003619void
3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3622 int nbchar = 0;
3623 int cur, l;
3624 int count = 0;
3625
3626 SHRINK;
3627 GROW;
3628 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003629 while ((cur != '<') && /* checked */
3630 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if ((cur == ']') && (NXT(1) == ']') &&
3633 (NXT(2) == '>')) {
3634 if (cdata) break;
3635 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003637 }
3638 }
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003641 buf[nbchar] = 0;
3642
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003647 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3650 buf, nbchar);
3651 } else {
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 nbchar = 0;
3661 }
3662 count++;
3663 if (count > 50) {
3664 GROW;
3665 count = 0;
3666 }
3667 NEXTL(l);
3668 cur = CUR_CHAR(l);
3669 }
3670 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003671 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003673 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003674 */
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003676 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3684 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 }
3687 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3692 cur);
3693 NEXTL(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695}
3696
3697/**
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3703 *
3704 * Parse an External ID or a Public ID
3705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3708 *
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3711 *
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3713 *
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3717 */
3718
3719xmlChar *
3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3722
3723 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003724
3725 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003727 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 SKIP_BLANKS;
3733 URI = xmlParseSystemLiteral(ctxt);
3734 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (strict) {
3749 /*
3750 * We don't handle [83] so "S SystemLiteral" is required.
3751 */
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 } else {
3757 /*
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3761 */
3762 const xmlChar *ptr;
3763 GROW;
3764
3765 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767
William M. Brack76e95df2003-10-18 16:20:14 +00003768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3770 }
3771 SKIP_BLANKS;
3772 URI = xmlParseSystemLiteral(ctxt);
3773 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 }
3777 return(URI);
3778}
3779
3780/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003782 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003786 *
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003790 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3793 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003794static void
3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 int q, ql;
3797 int r, rl;
3798 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 xmlParserInputPtr input = ctxt->input;
3800 int count = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003803 len = 0;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3806 if (buf == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 return;
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003811 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003812 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003813 if (q == 0)
3814 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003815 NEXTL(ql);
3816 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003817 if (r == 0)
3818 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 NEXTL(rl);
3820 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003821 if (cur == 0)
3822 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003823 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003824 ((cur != '>') ||
3825 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003826 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003827 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
3829 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003830 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003832 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3833 if (new_buf == NULL) {
3834 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003835 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return;
3837 }
William M. Bracka3215c72004-07-31 16:24:01 +00003838 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003839 }
3840 COPY_BUF(ql,buf,len,q);
3841 q = r;
3842 ql = rl;
3843 r = cur;
3844 rl = l;
3845
3846 count++;
3847 if (count > 50) {
3848 GROW;
3849 count = 0;
3850 }
3851 NEXTL(l);
3852 cur = CUR_CHAR(l);
3853 if (cur == 0) {
3854 SHRINK;
3855 GROW;
3856 cur = CUR_CHAR(l);
3857 }
3858 }
3859 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003860 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003862 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 xmlFree(buf);
3864 } else {
3865 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3867 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 NEXT;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3871 (!ctxt->disableSAX))
3872 ctxt->sax->comment(ctxt->userData, buf);
3873 xmlFree(buf);
3874 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003875 return;
3876not_terminated:
3877 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3878 "Comment not terminated\n", NULL);
3879 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003880}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003881/**
3882 * xmlParseComment:
3883 * @ctxt: an XML parser context
3884 *
3885 * Skip an XML (SGML) comment <!-- .... -->
3886 * The spec says that "For compatibility, the string "--" (double-hyphen)
3887 * must not occur within comments. "
3888 *
3889 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3890 */
3891void
3892xmlParseComment(xmlParserCtxtPtr ctxt) {
3893 xmlChar *buf = NULL;
3894 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003895 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003896 xmlParserInputState state;
3897 const xmlChar *in;
3898 int nbchar = 0, ccol;
3899
3900 /*
3901 * Check that there is a comment right here.
3902 */
3903 if ((RAW != '<') || (NXT(1) != '!') ||
3904 (NXT(2) != '-') || (NXT(3) != '-')) return;
3905
3906 state = ctxt->instate;
3907 ctxt->instate = XML_PARSER_COMMENT;
3908 SKIP(4);
3909 SHRINK;
3910 GROW;
3911
3912 /*
3913 * Accelerated common case where input don't need to be
3914 * modified before passing it to the handler.
3915 */
3916 in = ctxt->input->cur;
3917 do {
3918 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003919 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003920 ctxt->input->line++; ctxt->input->col = 1;
3921 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003922 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003923 }
3924get_more:
3925 ccol = ctxt->input->col;
3926 while (((*in > '-') && (*in <= 0x7F)) ||
3927 ((*in >= 0x20) && (*in < '-')) ||
3928 (*in == 0x09)) {
3929 in++;
3930 ccol++;
3931 }
3932 ctxt->input->col = ccol;
3933 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003934 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003935 ctxt->input->line++; ctxt->input->col = 1;
3936 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003937 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003938 goto get_more;
3939 }
3940 nbchar = in - ctxt->input->cur;
3941 /*
3942 * save current set of data
3943 */
3944 if (nbchar > 0) {
3945 if ((ctxt->sax != NULL) &&
3946 (ctxt->sax->comment != NULL)) {
3947 if (buf == NULL) {
3948 if ((*in == '-') && (in[1] == '-'))
3949 size = nbchar + 1;
3950 else
3951 size = XML_PARSER_BUFFER_SIZE + nbchar;
3952 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3953 if (buf == NULL) {
3954 xmlErrMemory(ctxt, NULL);
3955 ctxt->instate = state;
3956 return;
3957 }
3958 len = 0;
3959 } else if (len + nbchar + 1 >= size) {
3960 xmlChar *new_buf;
3961 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3962 new_buf = (xmlChar *) xmlRealloc(buf,
3963 size * sizeof(xmlChar));
3964 if (new_buf == NULL) {
3965 xmlFree (buf);
3966 xmlErrMemory(ctxt, NULL);
3967 ctxt->instate = state;
3968 return;
3969 }
3970 buf = new_buf;
3971 }
3972 memcpy(&buf[len], ctxt->input->cur, nbchar);
3973 len += nbchar;
3974 buf[len] = 0;
3975 }
3976 }
3977 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003978 if (*in == 0xA) {
3979 in++;
3980 ctxt->input->line++; ctxt->input->col = 1;
3981 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003982 if (*in == 0xD) {
3983 in++;
3984 if (*in == 0xA) {
3985 ctxt->input->cur = in;
3986 in++;
3987 ctxt->input->line++; ctxt->input->col = 1;
3988 continue; /* while */
3989 }
3990 in--;
3991 }
3992 SHRINK;
3993 GROW;
3994 in = ctxt->input->cur;
3995 if (*in == '-') {
3996 if (in[1] == '-') {
3997 if (in[2] == '>') {
3998 SKIP(3);
3999 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4000 (!ctxt->disableSAX)) {
4001 if (buf != NULL)
4002 ctxt->sax->comment(ctxt->userData, buf);
4003 else
4004 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4005 }
4006 if (buf != NULL)
4007 xmlFree(buf);
4008 ctxt->instate = state;
4009 return;
4010 }
4011 if (buf != NULL)
4012 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4013 "Comment not terminated \n<!--%.50s\n",
4014 buf);
4015 else
4016 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4017 "Comment not terminated \n", NULL);
4018 in++;
4019 ctxt->input->col++;
4020 }
4021 in++;
4022 ctxt->input->col++;
4023 goto get_more;
4024 }
4025 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4026 xmlParseCommentComplex(ctxt, buf, len, size);
4027 ctxt->instate = state;
4028 return;
4029}
4030
Owen Taylor3473f882001-02-23 17:55:21 +00004031
4032/**
4033 * xmlParsePITarget:
4034 * @ctxt: an XML parser context
4035 *
4036 * parse the name of a PI
4037 *
4038 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4039 *
4040 * Returns the PITarget name or NULL
4041 */
4042
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004043const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004044xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004045 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004046
4047 name = xmlParseName(ctxt);
4048 if ((name != NULL) &&
4049 ((name[0] == 'x') || (name[0] == 'X')) &&
4050 ((name[1] == 'm') || (name[1] == 'M')) &&
4051 ((name[2] == 'l') || (name[2] == 'L'))) {
4052 int i;
4053 if ((name[0] == 'x') && (name[1] == 'm') &&
4054 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004055 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004056 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004057 return(name);
4058 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004059 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 return(name);
4061 }
4062 for (i = 0;;i++) {
4063 if (xmlW3CPIs[i] == NULL) break;
4064 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4065 return(name);
4066 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004067 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4068 "xmlParsePITarget: invalid name prefix 'xml'\n",
4069 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071 return(name);
4072}
4073
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004074#ifdef LIBXML_CATALOG_ENABLED
4075/**
4076 * xmlParseCatalogPI:
4077 * @ctxt: an XML parser context
4078 * @catalog: the PI value string
4079 *
4080 * parse an XML Catalog Processing Instruction.
4081 *
4082 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4083 *
4084 * Occurs only if allowed by the user and if happening in the Misc
4085 * part of the document before any doctype informations
4086 * This will add the given catalog to the parsing context in order
4087 * to be used if there is a resolution need further down in the document
4088 */
4089
4090static void
4091xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4092 xmlChar *URL = NULL;
4093 const xmlChar *tmp, *base;
4094 xmlChar marker;
4095
4096 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004097 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004098 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4099 goto error;
4100 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004101 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004102 if (*tmp != '=') {
4103 return;
4104 }
4105 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004106 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004107 marker = *tmp;
4108 if ((marker != '\'') && (marker != '"'))
4109 goto error;
4110 tmp++;
4111 base = tmp;
4112 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4113 if (*tmp == 0)
4114 goto error;
4115 URL = xmlStrndup(base, tmp - base);
4116 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004117 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004118 if (*tmp != 0)
4119 goto error;
4120
4121 if (URL != NULL) {
4122 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4123 xmlFree(URL);
4124 }
4125 return;
4126
4127error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004128 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4129 "Catalog PI syntax error: %s\n",
4130 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004131 if (URL != NULL)
4132 xmlFree(URL);
4133}
4134#endif
4135
Owen Taylor3473f882001-02-23 17:55:21 +00004136/**
4137 * xmlParsePI:
4138 * @ctxt: an XML parser context
4139 *
4140 * parse an XML Processing Instruction.
4141 *
4142 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4143 *
4144 * The processing is transfered to SAX once parsed.
4145 */
4146
4147void
4148xmlParsePI(xmlParserCtxtPtr ctxt) {
4149 xmlChar *buf = NULL;
4150 int len = 0;
4151 int size = XML_PARSER_BUFFER_SIZE;
4152 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004153 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004154 xmlParserInputState state;
4155 int count = 0;
4156
4157 if ((RAW == '<') && (NXT(1) == '?')) {
4158 xmlParserInputPtr input = ctxt->input;
4159 state = ctxt->instate;
4160 ctxt->instate = XML_PARSER_PI;
4161 /*
4162 * this is a Processing Instruction.
4163 */
4164 SKIP(2);
4165 SHRINK;
4166
4167 /*
4168 * Parse the target name and check for special support like
4169 * namespace.
4170 */
4171 target = xmlParsePITarget(ctxt);
4172 if (target != NULL) {
4173 if ((RAW == '?') && (NXT(1) == '>')) {
4174 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004175 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4176 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004177 }
4178 SKIP(2);
4179
4180 /*
4181 * SAX: PI detected.
4182 */
4183 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4184 (ctxt->sax->processingInstruction != NULL))
4185 ctxt->sax->processingInstruction(ctxt->userData,
4186 target, NULL);
4187 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 return;
4189 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004190 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004191 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004192 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004193 ctxt->instate = state;
4194 return;
4195 }
4196 cur = CUR;
4197 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004198 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4199 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 SKIP_BLANKS;
4202 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004203 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004204 ((cur != '?') || (NXT(1) != '>'))) {
4205 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004206 xmlChar *tmp;
4207
Owen Taylor3473f882001-02-23 17:55:21 +00004208 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004212 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004213 ctxt->instate = state;
4214 return;
4215 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004216 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 count++;
4219 if (count > 50) {
4220 GROW;
4221 count = 0;
4222 }
4223 COPY_BUF(l,buf,len,cur);
4224 NEXTL(l);
4225 cur = CUR_CHAR(l);
4226 if (cur == 0) {
4227 SHRINK;
4228 GROW;
4229 cur = CUR_CHAR(l);
4230 }
4231 }
4232 buf[len] = 0;
4233 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004234 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4235 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 } else {
4237 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004238 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4239 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 SKIP(2);
4242
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004243#ifdef LIBXML_CATALOG_ENABLED
4244 if (((state == XML_PARSER_MISC) ||
4245 (state == XML_PARSER_START)) &&
4246 (xmlStrEqual(target, XML_CATALOG_PI))) {
4247 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4248 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4249 (allow == XML_CATA_ALLOW_ALL))
4250 xmlParseCatalogPI(ctxt, buf);
4251 }
4252#endif
4253
4254
Owen Taylor3473f882001-02-23 17:55:21 +00004255 /*
4256 * SAX: PI detected.
4257 */
4258 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4259 (ctxt->sax->processingInstruction != NULL))
4260 ctxt->sax->processingInstruction(ctxt->userData,
4261 target, buf);
4262 }
4263 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004265 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 ctxt->instate = state;
4268 }
4269}
4270
4271/**
4272 * xmlParseNotationDecl:
4273 * @ctxt: an XML parser context
4274 *
4275 * parse a notation declaration
4276 *
4277 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4278 *
4279 * Hence there is actually 3 choices:
4280 * 'PUBLIC' S PubidLiteral
4281 * 'PUBLIC' S PubidLiteral S SystemLiteral
4282 * and 'SYSTEM' S SystemLiteral
4283 *
4284 * See the NOTE on xmlParseExternalID().
4285 */
4286
4287void
4288xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004289 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004290 xmlChar *Pubid;
4291 xmlChar *Systemid;
4292
Daniel Veillarda07050d2003-10-19 14:46:32 +00004293 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004294 xmlParserInputPtr input = ctxt->input;
4295 SHRINK;
4296 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004297 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4299 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return;
4301 }
4302 SKIP_BLANKS;
4303
Daniel Veillard76d66f42001-05-16 21:05:17 +00004304 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004305 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004306 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004307 return;
4308 }
William M. Brack76e95df2003-10-18 16:20:14 +00004309 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004311 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004312 return;
4313 }
4314 SKIP_BLANKS;
4315
4316 /*
4317 * Parse the IDs.
4318 */
4319 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4320 SKIP_BLANKS;
4321
4322 if (RAW == '>') {
4323 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004324 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4325 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004326 }
4327 NEXT;
4328 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4329 (ctxt->sax->notationDecl != NULL))
4330 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4331 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004332 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004333 }
Owen Taylor3473f882001-02-23 17:55:21 +00004334 if (Systemid != NULL) xmlFree(Systemid);
4335 if (Pubid != NULL) xmlFree(Pubid);
4336 }
4337}
4338
4339/**
4340 * xmlParseEntityDecl:
4341 * @ctxt: an XML parser context
4342 *
4343 * parse <!ENTITY declarations
4344 *
4345 * [70] EntityDecl ::= GEDecl | PEDecl
4346 *
4347 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4348 *
4349 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4350 *
4351 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4352 *
4353 * [74] PEDef ::= EntityValue | ExternalID
4354 *
4355 * [76] NDataDecl ::= S 'NDATA' S Name
4356 *
4357 * [ VC: Notation Declared ]
4358 * The Name must match the declared name of a notation.
4359 */
4360
4361void
4362xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004363 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 xmlChar *value = NULL;
4365 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004366 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004367 int isParameter = 0;
4368 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004369 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004370
Daniel Veillard4c778d82005-01-23 17:37:44 +00004371 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004372 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004374 SHRINK;
4375 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004376 skipped = SKIP_BLANKS;
4377 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004378 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4379 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004380 }
Owen Taylor3473f882001-02-23 17:55:21 +00004381
4382 if (RAW == '%') {
4383 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004384 skipped = SKIP_BLANKS;
4385 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4387 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004388 }
Owen Taylor3473f882001-02-23 17:55:21 +00004389 isParameter = 1;
4390 }
4391
Daniel Veillard76d66f42001-05-16 21:05:17 +00004392 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004394 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4395 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004396 return;
4397 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004398 skipped = SKIP_BLANKS;
4399 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004400 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4401 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004402 }
Owen Taylor3473f882001-02-23 17:55:21 +00004403
Daniel Veillardf5582f12002-06-11 10:08:16 +00004404 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 /*
4406 * handle the various case of definitions...
4407 */
4408 if (isParameter) {
4409 if ((RAW == '"') || (RAW == '\'')) {
4410 value = xmlParseEntityValue(ctxt, &orig);
4411 if (value) {
4412 if ((ctxt->sax != NULL) &&
4413 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4414 ctxt->sax->entityDecl(ctxt->userData, name,
4415 XML_INTERNAL_PARAMETER_ENTITY,
4416 NULL, NULL, value);
4417 }
4418 } else {
4419 URI = xmlParseExternalID(ctxt, &literal, 1);
4420 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 }
4423 if (URI) {
4424 xmlURIPtr uri;
4425
4426 uri = xmlParseURI((const char *) URI);
4427 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004428 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4429 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004430 /*
4431 * This really ought to be a well formedness error
4432 * but the XML Core WG decided otherwise c.f. issue
4433 * E26 of the XML erratas.
4434 */
Owen Taylor3473f882001-02-23 17:55:21 +00004435 } else {
4436 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004437 /*
4438 * Okay this is foolish to block those but not
4439 * invalid URIs.
4440 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004441 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 } else {
4443 if ((ctxt->sax != NULL) &&
4444 (!ctxt->disableSAX) &&
4445 (ctxt->sax->entityDecl != NULL))
4446 ctxt->sax->entityDecl(ctxt->userData, name,
4447 XML_EXTERNAL_PARAMETER_ENTITY,
4448 literal, URI, NULL);
4449 }
4450 xmlFreeURI(uri);
4451 }
4452 }
4453 }
4454 } else {
4455 if ((RAW == '"') || (RAW == '\'')) {
4456 value = xmlParseEntityValue(ctxt, &orig);
4457 if ((ctxt->sax != NULL) &&
4458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4459 ctxt->sax->entityDecl(ctxt->userData, name,
4460 XML_INTERNAL_GENERAL_ENTITY,
4461 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004462 /*
4463 * For expat compatibility in SAX mode.
4464 */
4465 if ((ctxt->myDoc == NULL) ||
4466 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4467 if (ctxt->myDoc == NULL) {
4468 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4469 }
4470 if (ctxt->myDoc->intSubset == NULL)
4471 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4472 BAD_CAST "fake", NULL, NULL);
4473
Daniel Veillard1af9a412003-08-20 22:54:39 +00004474 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4475 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004476 }
Owen Taylor3473f882001-02-23 17:55:21 +00004477 } else {
4478 URI = xmlParseExternalID(ctxt, &literal, 1);
4479 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004480 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004481 }
4482 if (URI) {
4483 xmlURIPtr uri;
4484
4485 uri = xmlParseURI((const char *)URI);
4486 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004487 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4488 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004489 /*
4490 * This really ought to be a well formedness error
4491 * but the XML Core WG decided otherwise c.f. issue
4492 * E26 of the XML erratas.
4493 */
Owen Taylor3473f882001-02-23 17:55:21 +00004494 } else {
4495 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004496 /*
4497 * Okay this is foolish to block those but not
4498 * invalid URIs.
4499 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004500 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004501 }
4502 xmlFreeURI(uri);
4503 }
4504 }
William M. Brack76e95df2003-10-18 16:20:14 +00004505 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4507 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004508 }
4509 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004510 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004511 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004512 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004513 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4514 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004515 }
4516 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004517 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004518 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4519 (ctxt->sax->unparsedEntityDecl != NULL))
4520 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4521 literal, URI, ndata);
4522 } else {
4523 if ((ctxt->sax != NULL) &&
4524 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4525 ctxt->sax->entityDecl(ctxt->userData, name,
4526 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4527 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004528 /*
4529 * For expat compatibility in SAX mode.
4530 * assuming the entity repalcement was asked for
4531 */
4532 if ((ctxt->replaceEntities != 0) &&
4533 ((ctxt->myDoc == NULL) ||
4534 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4535 if (ctxt->myDoc == NULL) {
4536 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4537 }
4538
4539 if (ctxt->myDoc->intSubset == NULL)
4540 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4541 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004542 xmlSAX2EntityDecl(ctxt, name,
4543 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4544 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004545 }
Owen Taylor3473f882001-02-23 17:55:21 +00004546 }
4547 }
4548 }
4549 SKIP_BLANKS;
4550 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004551 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004552 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004553 } else {
4554 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004555 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4556 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558 NEXT;
4559 }
4560 if (orig != NULL) {
4561 /*
4562 * Ugly mechanism to save the raw entity value.
4563 */
4564 xmlEntityPtr cur = NULL;
4565
4566 if (isParameter) {
4567 if ((ctxt->sax != NULL) &&
4568 (ctxt->sax->getParameterEntity != NULL))
4569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4570 } else {
4571 if ((ctxt->sax != NULL) &&
4572 (ctxt->sax->getEntity != NULL))
4573 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004574 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004575 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004576 }
Owen Taylor3473f882001-02-23 17:55:21 +00004577 }
4578 if (cur != NULL) {
4579 if (cur->orig != NULL)
4580 xmlFree(orig);
4581 else
4582 cur->orig = orig;
4583 } else
4584 xmlFree(orig);
4585 }
Owen Taylor3473f882001-02-23 17:55:21 +00004586 if (value != NULL) xmlFree(value);
4587 if (URI != NULL) xmlFree(URI);
4588 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004589 }
4590}
4591
4592/**
4593 * xmlParseDefaultDecl:
4594 * @ctxt: an XML parser context
4595 * @value: Receive a possible fixed default value for the attribute
4596 *
4597 * Parse an attribute default declaration
4598 *
4599 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4600 *
4601 * [ VC: Required Attribute ]
4602 * if the default declaration is the keyword #REQUIRED, then the
4603 * attribute must be specified for all elements of the type in the
4604 * attribute-list declaration.
4605 *
4606 * [ VC: Attribute Default Legal ]
4607 * The declared default value must meet the lexical constraints of
4608 * the declared attribute type c.f. xmlValidateAttributeDecl()
4609 *
4610 * [ VC: Fixed Attribute Default ]
4611 * if an attribute has a default value declared with the #FIXED
4612 * keyword, instances of that attribute must match the default value.
4613 *
4614 * [ WFC: No < in Attribute Values ]
4615 * handled in xmlParseAttValue()
4616 *
4617 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4618 * or XML_ATTRIBUTE_FIXED.
4619 */
4620
4621int
4622xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4623 int val;
4624 xmlChar *ret;
4625
4626 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004627 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(9);
4629 return(XML_ATTRIBUTE_REQUIRED);
4630 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004631 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004632 SKIP(8);
4633 return(XML_ATTRIBUTE_IMPLIED);
4634 }
4635 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004636 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004637 SKIP(6);
4638 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004639 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4641 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 SKIP_BLANKS;
4644 }
4645 ret = xmlParseAttValue(ctxt);
4646 ctxt->instate = XML_PARSER_DTD;
4647 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004648 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004650 } else
4651 *value = ret;
4652 return(val);
4653}
4654
4655/**
4656 * xmlParseNotationType:
4657 * @ctxt: an XML parser context
4658 *
4659 * parse an Notation attribute type.
4660 *
4661 * Note: the leading 'NOTATION' S part has already being parsed...
4662 *
4663 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4664 *
4665 * [ VC: Notation Attributes ]
4666 * Values of this type must match one of the notation names included
4667 * in the declaration; all notation names in the declaration must be declared.
4668 *
4669 * Returns: the notation attribute tree built while parsing
4670 */
4671
4672xmlEnumerationPtr
4673xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004674 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004675 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4676
4677 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 return(NULL);
4680 }
4681 SHRINK;
4682 do {
4683 NEXT;
4684 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004685 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4688 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004689 return(ret);
4690 }
4691 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004692 if (cur == NULL) return(ret);
4693 if (last == NULL) ret = last = cur;
4694 else {
4695 last->next = cur;
4696 last = cur;
4697 }
4698 SKIP_BLANKS;
4699 } while (RAW == '|');
4700 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004702 if ((last != NULL) && (last != ret))
4703 xmlFreeEnumeration(last);
4704 return(ret);
4705 }
4706 NEXT;
4707 return(ret);
4708}
4709
4710/**
4711 * xmlParseEnumerationType:
4712 * @ctxt: an XML parser context
4713 *
4714 * parse an Enumeration attribute type.
4715 *
4716 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4717 *
4718 * [ VC: Enumeration ]
4719 * Values of this type must match one of the Nmtoken tokens in
4720 * the declaration
4721 *
4722 * Returns: the enumeration attribute tree built while parsing
4723 */
4724
4725xmlEnumerationPtr
4726xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4727 xmlChar *name;
4728 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4729
4730 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004731 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004732 return(NULL);
4733 }
4734 SHRINK;
4735 do {
4736 NEXT;
4737 SKIP_BLANKS;
4738 name = xmlParseNmtoken(ctxt);
4739 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004740 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 return(ret);
4742 }
4743 cur = xmlCreateEnumeration(name);
4744 xmlFree(name);
4745 if (cur == NULL) return(ret);
4746 if (last == NULL) ret = last = cur;
4747 else {
4748 last->next = cur;
4749 last = cur;
4750 }
4751 SKIP_BLANKS;
4752 } while (RAW == '|');
4753 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004754 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004755 return(ret);
4756 }
4757 NEXT;
4758 return(ret);
4759}
4760
4761/**
4762 * xmlParseEnumeratedType:
4763 * @ctxt: an XML parser context
4764 * @tree: the enumeration tree built while parsing
4765 *
4766 * parse an Enumerated attribute type.
4767 *
4768 * [57] EnumeratedType ::= NotationType | Enumeration
4769 *
4770 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4771 *
4772 *
4773 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4774 */
4775
4776int
4777xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004778 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004779 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004780 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4782 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004783 return(0);
4784 }
4785 SKIP_BLANKS;
4786 *tree = xmlParseNotationType(ctxt);
4787 if (*tree == NULL) return(0);
4788 return(XML_ATTRIBUTE_NOTATION);
4789 }
4790 *tree = xmlParseEnumerationType(ctxt);
4791 if (*tree == NULL) return(0);
4792 return(XML_ATTRIBUTE_ENUMERATION);
4793}
4794
4795/**
4796 * xmlParseAttributeType:
4797 * @ctxt: an XML parser context
4798 * @tree: the enumeration tree built while parsing
4799 *
4800 * parse the Attribute list def for an element
4801 *
4802 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4803 *
4804 * [55] StringType ::= 'CDATA'
4805 *
4806 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4807 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4808 *
4809 * Validity constraints for attribute values syntax are checked in
4810 * xmlValidateAttributeValue()
4811 *
4812 * [ VC: ID ]
4813 * Values of type ID must match the Name production. A name must not
4814 * appear more than once in an XML document as a value of this type;
4815 * i.e., ID values must uniquely identify the elements which bear them.
4816 *
4817 * [ VC: One ID per Element Type ]
4818 * No element type may have more than one ID attribute specified.
4819 *
4820 * [ VC: ID Attribute Default ]
4821 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4822 *
4823 * [ VC: IDREF ]
4824 * Values of type IDREF must match the Name production, and values
4825 * of type IDREFS must match Names; each IDREF Name must match the value
4826 * of an ID attribute on some element in the XML document; i.e. IDREF
4827 * values must match the value of some ID attribute.
4828 *
4829 * [ VC: Entity Name ]
4830 * Values of type ENTITY must match the Name production, values
4831 * of type ENTITIES must match Names; each Entity Name must match the
4832 * name of an unparsed entity declared in the DTD.
4833 *
4834 * [ VC: Name Token ]
4835 * Values of type NMTOKEN must match the Nmtoken production; values
4836 * of type NMTOKENS must match Nmtokens.
4837 *
4838 * Returns the attribute type
4839 */
4840int
4841xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4842 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(5);
4845 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004846 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004847 SKIP(6);
4848 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004849 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004850 SKIP(5);
4851 return(XML_ATTRIBUTE_IDREF);
4852 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4853 SKIP(2);
4854 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004855 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP(6);
4857 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004858 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004859 SKIP(8);
4860 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004861 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP(8);
4863 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004864 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004865 SKIP(7);
4866 return(XML_ATTRIBUTE_NMTOKEN);
4867 }
4868 return(xmlParseEnumeratedType(ctxt, tree));
4869}
4870
4871/**
4872 * xmlParseAttributeListDecl:
4873 * @ctxt: an XML parser context
4874 *
4875 * : parse the Attribute list def for an element
4876 *
4877 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4878 *
4879 * [53] AttDef ::= S Name S AttType S DefaultDecl
4880 *
4881 */
4882void
4883xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004884 const xmlChar *elemName;
4885 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlEnumerationPtr tree;
4887
Daniel Veillarda07050d2003-10-19 14:46:32 +00004888 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004889 xmlParserInputPtr input = ctxt->input;
4890
4891 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004892 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004894 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004895 }
4896 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004897 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004898 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4900 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004901 return;
4902 }
4903 SKIP_BLANKS;
4904 GROW;
4905 while (RAW != '>') {
4906 const xmlChar *check = CUR_PTR;
4907 int type;
4908 int def;
4909 xmlChar *defaultValue = NULL;
4910
4911 GROW;
4912 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004913 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4916 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 break;
4918 }
4919 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004920 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004921 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004922 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004923 break;
4924 }
4925 SKIP_BLANKS;
4926
4927 type = xmlParseAttributeType(ctxt, &tree);
4928 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004929 break;
4930 }
4931
4932 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004933 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4935 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004936 if (tree != NULL)
4937 xmlFreeEnumeration(tree);
4938 break;
4939 }
4940 SKIP_BLANKS;
4941
4942 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4943 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004944 if (defaultValue != NULL)
4945 xmlFree(defaultValue);
4946 if (tree != NULL)
4947 xmlFreeEnumeration(tree);
4948 break;
4949 }
4950
4951 GROW;
4952 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004953 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004954 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004955 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (defaultValue != NULL)
4957 xmlFree(defaultValue);
4958 if (tree != NULL)
4959 xmlFreeEnumeration(tree);
4960 break;
4961 }
4962 SKIP_BLANKS;
4963 }
4964 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004965 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4966 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 if (defaultValue != NULL)
4968 xmlFree(defaultValue);
4969 if (tree != NULL)
4970 xmlFreeEnumeration(tree);
4971 break;
4972 }
4973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4974 (ctxt->sax->attributeDecl != NULL))
4975 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4976 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004977 else if (tree != NULL)
4978 xmlFreeEnumeration(tree);
4979
4980 if ((ctxt->sax2) && (defaultValue != NULL) &&
4981 (def != XML_ATTRIBUTE_IMPLIED) &&
4982 (def != XML_ATTRIBUTE_REQUIRED)) {
4983 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4984 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004985 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4986 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4987 }
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (defaultValue != NULL)
4989 xmlFree(defaultValue);
4990 GROW;
4991 }
4992 if (RAW == '>') {
4993 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4995 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997 NEXT;
4998 }
Owen Taylor3473f882001-02-23 17:55:21 +00004999 }
5000}
5001
5002/**
5003 * xmlParseElementMixedContentDecl:
5004 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005005 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005006 *
5007 * parse the declaration for a Mixed Element content
5008 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5009 *
5010 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5011 * '(' S? '#PCDATA' S? ')'
5012 *
5013 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5014 *
5015 * [ VC: No Duplicate Types ]
5016 * The same name must not appear more than once in a single
5017 * mixed-content declaration.
5018 *
5019 * returns: the list of the xmlElementContentPtr describing the element choices
5020 */
5021xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005022xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005023 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005024 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005025
5026 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005027 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005028 SKIP(7);
5029 SKIP_BLANKS;
5030 SHRINK;
5031 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005032 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005033 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5034"Element content declaration doesn't start and stop in the same entity\n",
5035 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005036 }
Owen Taylor3473f882001-02-23 17:55:21 +00005037 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005038 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005039 if (RAW == '*') {
5040 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5041 NEXT;
5042 }
5043 return(ret);
5044 }
5045 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005046 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 if (ret == NULL) return(NULL);
5048 }
5049 while (RAW == '|') {
5050 NEXT;
5051 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005052 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005053 if (ret == NULL) return(NULL);
5054 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005055 if (cur != NULL)
5056 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005057 cur = ret;
5058 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005059 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005060 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005061 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005062 if (n->c1 != NULL)
5063 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005065 if (n != NULL)
5066 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 }
5069 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005070 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005072 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005073 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005074 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005075 return(NULL);
5076 }
5077 SKIP_BLANKS;
5078 GROW;
5079 }
5080 if ((RAW == ')') && (NXT(1) == '*')) {
5081 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005082 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005083 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005084 if (cur->c2 != NULL)
5085 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005086 }
5087 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005089 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5090"Element content declaration doesn't start and stop in the same entity\n",
5091 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005092 }
Owen Taylor3473f882001-02-23 17:55:21 +00005093 SKIP(2);
5094 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005095 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005096 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 return(NULL);
5098 }
5099
5100 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005101 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103 return(ret);
5104}
5105
5106/**
5107 * xmlParseElementChildrenContentDecl:
5108 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005109 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005110 *
5111 * parse the declaration for a Mixed Element content
5112 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5113 *
5114 *
5115 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5116 *
5117 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5118 *
5119 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5120 *
5121 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5122 *
5123 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5124 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005125 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005126 * opening or closing parentheses in a choice, seq, or Mixed
5127 * construct is contained in the replacement text for a parameter
5128 * entity, both must be contained in the same replacement text. For
5129 * interoperability, if a parameter-entity reference appears in a
5130 * choice, seq, or Mixed construct, its replacement text should not
5131 * be empty, and neither the first nor last non-blank character of
5132 * the replacement text should be a connector (| or ,).
5133 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005134 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005135 * hierarchy.
5136 */
5137xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005139 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005140 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005141 xmlChar type = 0;
5142
5143 SKIP_BLANKS;
5144 GROW;
5145 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005146 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005147
Owen Taylor3473f882001-02-23 17:55:21 +00005148 /* Recurse on first child */
5149 NEXT;
5150 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005151 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005152 SKIP_BLANKS;
5153 GROW;
5154 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005155 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005157 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005158 return(NULL);
5159 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005160 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005161 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005163 return(NULL);
5164 }
Owen Taylor3473f882001-02-23 17:55:21 +00005165 GROW;
5166 if (RAW == '?') {
5167 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5168 NEXT;
5169 } else if (RAW == '*') {
5170 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5171 NEXT;
5172 } else if (RAW == '+') {
5173 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5174 NEXT;
5175 } else {
5176 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178 GROW;
5179 }
5180 SKIP_BLANKS;
5181 SHRINK;
5182 while (RAW != ')') {
5183 /*
5184 * Each loop we parse one separator and one element.
5185 */
5186 if (RAW == ',') {
5187 if (type == 0) type = CUR;
5188
5189 /*
5190 * Detect "Name | Name , Name" error
5191 */
5192 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005193 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005194 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005196 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005197 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005199 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 return(NULL);
5201 }
5202 NEXT;
5203
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005206 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005207 xmlFreeDocElementContent(ctxt->myDoc, last);
5208 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 return(NULL);
5210 }
5211 if (last == NULL) {
5212 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005213 if (ret != NULL)
5214 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005215 ret = cur = op;
5216 } else {
5217 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (op != NULL)
5219 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005221 if (last != NULL)
5222 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005223 cur =op;
5224 last = NULL;
5225 }
5226 } else if (RAW == '|') {
5227 if (type == 0) type = CUR;
5228
5229 /*
5230 * Detect "Name , Name | Name" error
5231 */
5232 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005233 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005234 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005235 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005236 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005237 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005238 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 return(NULL);
5241 }
5242 NEXT;
5243
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005246 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005247 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005248 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005249 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 return(NULL);
5251 }
5252 if (last == NULL) {
5253 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005254 if (ret != NULL)
5255 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005256 ret = cur = op;
5257 } else {
5258 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005259 if (op != NULL)
5260 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005262 if (last != NULL)
5263 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005264 cur =op;
5265 last = NULL;
5266 }
5267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005268 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005269 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 return(NULL);
5272 }
5273 GROW;
5274 SKIP_BLANKS;
5275 GROW;
5276 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 /* Recurse on second child */
5279 NEXT;
5280 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005281 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 SKIP_BLANKS;
5283 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005284 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005285 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005287 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005288 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005289 return(NULL);
5290 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005291 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 if (RAW == '?') {
5293 last->ocur = XML_ELEMENT_CONTENT_OPT;
5294 NEXT;
5295 } else if (RAW == '*') {
5296 last->ocur = XML_ELEMENT_CONTENT_MULT;
5297 NEXT;
5298 } else if (RAW == '+') {
5299 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5300 NEXT;
5301 } else {
5302 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5303 }
5304 }
5305 SKIP_BLANKS;
5306 GROW;
5307 }
5308 if ((cur != NULL) && (last != NULL)) {
5309 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005310 if (last != NULL)
5311 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005312 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005313 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005314 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5315"Element content declaration doesn't start and stop in the same entity\n",
5316 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005317 }
Owen Taylor3473f882001-02-23 17:55:21 +00005318 NEXT;
5319 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005320 if (ret != NULL) {
5321 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5322 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5323 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5324 else
5325 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5326 }
Owen Taylor3473f882001-02-23 17:55:21 +00005327 NEXT;
5328 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005329 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005330 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005331 cur = ret;
5332 /*
5333 * Some normalization:
5334 * (a | b* | c?)* == (a | b | c)*
5335 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005336 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005337 if ((cur->c1 != NULL) &&
5338 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5339 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5340 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5341 if ((cur->c2 != NULL) &&
5342 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5343 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5344 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5345 cur = cur->c2;
5346 }
5347 }
Owen Taylor3473f882001-02-23 17:55:21 +00005348 NEXT;
5349 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005350 if (ret != NULL) {
5351 int found = 0;
5352
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005353 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5354 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5355 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005356 else
5357 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005358 /*
5359 * Some normalization:
5360 * (a | b*)+ == (a | b)*
5361 * (a | b?)+ == (a | b)*
5362 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005363 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005364 if ((cur->c1 != NULL) &&
5365 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5366 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5367 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5368 found = 1;
5369 }
5370 if ((cur->c2 != NULL) &&
5371 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5372 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5373 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5374 found = 1;
5375 }
5376 cur = cur->c2;
5377 }
5378 if (found)
5379 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5380 }
Owen Taylor3473f882001-02-23 17:55:21 +00005381 NEXT;
5382 }
5383 return(ret);
5384}
5385
5386/**
5387 * xmlParseElementContentDecl:
5388 * @ctxt: an XML parser context
5389 * @name: the name of the element being defined.
5390 * @result: the Element Content pointer will be stored here if any
5391 *
5392 * parse the declaration for an Element content either Mixed or Children,
5393 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5394 *
5395 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5396 *
5397 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5398 */
5399
5400int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005401xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005402 xmlElementContentPtr *result) {
5403
5404 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005405 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005406 int res;
5407
5408 *result = NULL;
5409
5410 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005411 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005412 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 return(-1);
5414 }
5415 NEXT;
5416 GROW;
5417 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005418 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 res = XML_ELEMENT_TYPE_MIXED;
5421 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005423 res = XML_ELEMENT_TYPE_ELEMENT;
5424 }
Owen Taylor3473f882001-02-23 17:55:21 +00005425 SKIP_BLANKS;
5426 *result = tree;
5427 return(res);
5428}
5429
5430/**
5431 * xmlParseElementDecl:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse an Element declaration.
5435 *
5436 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5437 *
5438 * [ VC: Unique Element Type Declaration ]
5439 * No element type may be declared more than once
5440 *
5441 * Returns the type of the element, or -1 in case of error
5442 */
5443int
5444xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005445 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005446 int ret = -1;
5447 xmlElementContentPtr content = NULL;
5448
Daniel Veillard4c778d82005-01-23 17:37:44 +00005449 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005450 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005451 xmlParserInputPtr input = ctxt->input;
5452
5453 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005457 }
5458 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005459 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5462 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005463 return(-1);
5464 }
5465 while ((RAW == 0) && (ctxt->inputNr > 1))
5466 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005467 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005468 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5469 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
5471 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005472 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005473 SKIP(5);
5474 /*
5475 * Element must always be empty.
5476 */
5477 ret = XML_ELEMENT_TYPE_EMPTY;
5478 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5479 (NXT(2) == 'Y')) {
5480 SKIP(3);
5481 /*
5482 * Element is a generic container.
5483 */
5484 ret = XML_ELEMENT_TYPE_ANY;
5485 } else if (RAW == '(') {
5486 ret = xmlParseElementContentDecl(ctxt, name, &content);
5487 } else {
5488 /*
5489 * [ WFC: PEs in Internal Subset ] error handling.
5490 */
5491 if ((RAW == '%') && (ctxt->external == 0) &&
5492 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005494 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005495 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005496 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005497 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5498 }
Owen Taylor3473f882001-02-23 17:55:21 +00005499 return(-1);
5500 }
5501
5502 SKIP_BLANKS;
5503 /*
5504 * Pop-up of finished entities.
5505 */
5506 while ((RAW == 0) && (ctxt->inputNr > 1))
5507 xmlPopInput(ctxt);
5508 SKIP_BLANKS;
5509
5510 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005511 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005512 if (content != NULL) {
5513 xmlFreeDocElementContent(ctxt->myDoc, content);
5514 }
Owen Taylor3473f882001-02-23 17:55:21 +00005515 } else {
5516 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005517 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5518 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005519 }
5520
5521 NEXT;
5522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005523 (ctxt->sax->elementDecl != NULL)) {
5524 if (content != NULL)
5525 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005526 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5527 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005528 if ((content != NULL) && (content->parent == NULL)) {
5529 /*
5530 * this is a trick: if xmlAddElementDecl is called,
5531 * instead of copying the full tree it is plugged directly
5532 * if called from the parser. Avoid duplicating the
5533 * interfaces or change the API/ABI
5534 */
5535 xmlFreeDocElementContent(ctxt->myDoc, content);
5536 }
5537 } else if (content != NULL) {
5538 xmlFreeDocElementContent(ctxt->myDoc, content);
5539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
Owen Taylor3473f882001-02-23 17:55:21 +00005541 }
5542 return(ret);
5543}
5544
5545/**
Owen Taylor3473f882001-02-23 17:55:21 +00005546 * xmlParseConditionalSections
5547 * @ctxt: an XML parser context
5548 *
5549 * [61] conditionalSect ::= includeSect | ignoreSect
5550 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5551 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5552 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5553 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5554 */
5555
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005556static void
Owen Taylor3473f882001-02-23 17:55:21 +00005557xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5558 SKIP(3);
5559 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 SKIP(7);
5562 SKIP_BLANKS;
5563 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005564 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005565 } else {
5566 NEXT;
5567 }
5568 if (xmlParserDebugEntities) {
5569 if ((ctxt->input != NULL) && (ctxt->input->filename))
5570 xmlGenericError(xmlGenericErrorContext,
5571 "%s(%d): ", ctxt->input->filename,
5572 ctxt->input->line);
5573 xmlGenericError(xmlGenericErrorContext,
5574 "Entering INCLUDE Conditional Section\n");
5575 }
5576
5577 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5578 (NXT(2) != '>'))) {
5579 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005580 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005581
5582 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5583 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005585 NEXT;
5586 } else if (RAW == '%') {
5587 xmlParsePEReference(ctxt);
5588 } else
5589 xmlParseMarkupDecl(ctxt);
5590
5591 /*
5592 * Pop-up of finished entities.
5593 */
5594 while ((RAW == 0) && (ctxt->inputNr > 1))
5595 xmlPopInput(ctxt);
5596
Daniel Veillardfdc91562002-07-01 21:52:03 +00005597 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005598 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005599 break;
5600 }
5601 }
5602 if (xmlParserDebugEntities) {
5603 if ((ctxt->input != NULL) && (ctxt->input->filename))
5604 xmlGenericError(xmlGenericErrorContext,
5605 "%s(%d): ", ctxt->input->filename,
5606 ctxt->input->line);
5607 xmlGenericError(xmlGenericErrorContext,
5608 "Leaving INCLUDE Conditional Section\n");
5609 }
5610
Daniel Veillarda07050d2003-10-19 14:46:32 +00005611 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005612 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005613 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005614 int depth = 0;
5615
5616 SKIP(6);
5617 SKIP_BLANKS;
5618 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005619 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005620 } else {
5621 NEXT;
5622 }
5623 if (xmlParserDebugEntities) {
5624 if ((ctxt->input != NULL) && (ctxt->input->filename))
5625 xmlGenericError(xmlGenericErrorContext,
5626 "%s(%d): ", ctxt->input->filename,
5627 ctxt->input->line);
5628 xmlGenericError(xmlGenericErrorContext,
5629 "Entering IGNORE Conditional Section\n");
5630 }
5631
5632 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005633 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005634 * But disable SAX event generating DTD building in the meantime
5635 */
5636 state = ctxt->disableSAX;
5637 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005638 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005639 ctxt->instate = XML_PARSER_IGNORE;
5640
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005641 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5643 depth++;
5644 SKIP(3);
5645 continue;
5646 }
5647 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5648 if (--depth >= 0) SKIP(3);
5649 continue;
5650 }
5651 NEXT;
5652 continue;
5653 }
5654
5655 ctxt->disableSAX = state;
5656 ctxt->instate = instate;
5657
5658 if (xmlParserDebugEntities) {
5659 if ((ctxt->input != NULL) && (ctxt->input->filename))
5660 xmlGenericError(xmlGenericErrorContext,
5661 "%s(%d): ", ctxt->input->filename,
5662 ctxt->input->line);
5663 xmlGenericError(xmlGenericErrorContext,
5664 "Leaving IGNORE Conditional Section\n");
5665 }
5666
5667 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005668 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005669 }
5670
5671 if (RAW == 0)
5672 SHRINK;
5673
5674 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005675 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 } else {
5677 SKIP(3);
5678 }
5679}
5680
5681/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005682 * xmlParseMarkupDecl:
5683 * @ctxt: an XML parser context
5684 *
5685 * parse Markup declarations
5686 *
5687 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5688 * NotationDecl | PI | Comment
5689 *
5690 * [ VC: Proper Declaration/PE Nesting ]
5691 * Parameter-entity replacement text must be properly nested with
5692 * markup declarations. That is to say, if either the first character
5693 * or the last character of a markup declaration (markupdecl above) is
5694 * contained in the replacement text for a parameter-entity reference,
5695 * both must be contained in the same replacement text.
5696 *
5697 * [ WFC: PEs in Internal Subset ]
5698 * In the internal DTD subset, parameter-entity references can occur
5699 * only where markup declarations can occur, not within markup declarations.
5700 * (This does not apply to references that occur in external parameter
5701 * entities or to the external subset.)
5702 */
5703void
5704xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5705 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005706 if (CUR == '<') {
5707 if (NXT(1) == '!') {
5708 switch (NXT(2)) {
5709 case 'E':
5710 if (NXT(3) == 'L')
5711 xmlParseElementDecl(ctxt);
5712 else if (NXT(3) == 'N')
5713 xmlParseEntityDecl(ctxt);
5714 break;
5715 case 'A':
5716 xmlParseAttributeListDecl(ctxt);
5717 break;
5718 case 'N':
5719 xmlParseNotationDecl(ctxt);
5720 break;
5721 case '-':
5722 xmlParseComment(ctxt);
5723 break;
5724 default:
5725 /* there is an error but it will be detected later */
5726 break;
5727 }
5728 } else if (NXT(1) == '?') {
5729 xmlParsePI(ctxt);
5730 }
5731 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005732 /*
5733 * This is only for internal subset. On external entities,
5734 * the replacement is done before parsing stage
5735 */
5736 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5737 xmlParsePEReference(ctxt);
5738
5739 /*
5740 * Conditional sections are allowed from entities included
5741 * by PE References in the internal subset.
5742 */
5743 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5744 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5745 xmlParseConditionalSections(ctxt);
5746 }
5747 }
5748
5749 ctxt->instate = XML_PARSER_DTD;
5750}
5751
5752/**
5753 * xmlParseTextDecl:
5754 * @ctxt: an XML parser context
5755 *
5756 * parse an XML declaration header for external entities
5757 *
5758 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5759 *
5760 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5761 */
5762
5763void
5764xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5765 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005766 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005767
5768 /*
5769 * We know that '<?xml' is here.
5770 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005771 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772 SKIP(5);
5773 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005774 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005775 return;
5776 }
5777
William M. Brack76e95df2003-10-18 16:20:14 +00005778 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005779 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5780 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005781 }
5782 SKIP_BLANKS;
5783
5784 /*
5785 * We may have the VersionInfo here.
5786 */
5787 version = xmlParseVersionInfo(ctxt);
5788 if (version == NULL)
5789 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005790 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005791 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005792 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5793 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005794 }
5795 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005796 ctxt->input->version = version;
5797
5798 /*
5799 * We must have the encoding declaration
5800 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005801 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005802 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803 /*
5804 * The XML REC instructs us to stop parsing right here
5805 */
5806 return;
5807 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005808 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5809 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5810 "Missing encoding in text declaration\n");
5811 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005812
5813 SKIP_BLANKS;
5814 if ((RAW == '?') && (NXT(1) == '>')) {
5815 SKIP(2);
5816 } else if (RAW == '>') {
5817 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005818 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005819 NEXT;
5820 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005821 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005822 MOVETO_ENDTAG(CUR_PTR);
5823 NEXT;
5824 }
5825}
5826
5827/**
Owen Taylor3473f882001-02-23 17:55:21 +00005828 * xmlParseExternalSubset:
5829 * @ctxt: an XML parser context
5830 * @ExternalID: the external identifier
5831 * @SystemID: the system identifier (or URL)
5832 *
5833 * parse Markup declarations from an external subset
5834 *
5835 * [30] extSubset ::= textDecl? extSubsetDecl
5836 *
5837 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5838 */
5839void
5840xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5841 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005842 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005844 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005845 xmlParseTextDecl(ctxt);
5846 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5847 /*
5848 * The XML REC instructs us to stop parsing right here
5849 */
5850 ctxt->instate = XML_PARSER_EOF;
5851 return;
5852 }
5853 }
5854 if (ctxt->myDoc == NULL) {
5855 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5856 }
5857 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5858 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5859
5860 ctxt->instate = XML_PARSER_DTD;
5861 ctxt->external = 1;
5862 while (((RAW == '<') && (NXT(1) == '?')) ||
5863 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005864 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005865 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005866 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005867
5868 GROW;
5869 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5870 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005871 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005872 NEXT;
5873 } else if (RAW == '%') {
5874 xmlParsePEReference(ctxt);
5875 } else
5876 xmlParseMarkupDecl(ctxt);
5877
5878 /*
5879 * Pop-up of finished entities.
5880 */
5881 while ((RAW == 0) && (ctxt->inputNr > 1))
5882 xmlPopInput(ctxt);
5883
Daniel Veillardfdc91562002-07-01 21:52:03 +00005884 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005885 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005886 break;
5887 }
5888 }
5889
5890 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005891 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 }
5893
5894}
5895
5896/**
5897 * xmlParseReference:
5898 * @ctxt: an XML parser context
5899 *
5900 * parse and handle entity references in content, depending on the SAX
5901 * interface, this may end-up in a call to character() if this is a
5902 * CharRef, a predefined entity, if there is no reference() callback.
5903 * or if the parser was asked to switch to that mode.
5904 *
5905 * [67] Reference ::= EntityRef | CharRef
5906 */
5907void
5908xmlParseReference(xmlParserCtxtPtr ctxt) {
5909 xmlEntityPtr ent;
5910 xmlChar *val;
5911 if (RAW != '&') return;
5912
5913 if (NXT(1) == '#') {
5914 int i = 0;
5915 xmlChar out[10];
5916 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005917 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005918
5919 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5920 /*
5921 * So we are using non-UTF-8 buffers
5922 * Check that the char fit on 8bits, if not
5923 * generate a CharRef.
5924 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005925 if (value <= 0xFF) {
5926 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005927 out[1] = 0;
5928 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5929 (!ctxt->disableSAX))
5930 ctxt->sax->characters(ctxt->userData, out, 1);
5931 } else {
5932 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005933 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005934 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005935 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5937 (!ctxt->disableSAX))
5938 ctxt->sax->reference(ctxt->userData, out);
5939 }
5940 } else {
5941 /*
5942 * Just encode the value in UTF-8
5943 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005944 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005945 out[i] = 0;
5946 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5947 (!ctxt->disableSAX))
5948 ctxt->sax->characters(ctxt->userData, out, i);
5949 }
5950 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005951 int was_checked;
5952
Owen Taylor3473f882001-02-23 17:55:21 +00005953 ent = xmlParseEntityRef(ctxt);
5954 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005955 if (!ctxt->wellFormed)
5956 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005957 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if ((ent->name != NULL) &&
5959 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5960 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005961 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005962
5963
5964 /*
5965 * The first reference to the entity trigger a parsing phase
5966 * where the ent->children is filled with the result from
5967 * the parsing.
5968 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005969 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005970 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005971
Owen Taylor3473f882001-02-23 17:55:21 +00005972 value = ent->content;
5973
5974 /*
5975 * Check that this entity is well formed
5976 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005977 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005978 (value[1] == 0) && (value[0] == '<') &&
5979 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5980 /*
5981 * DONE: get definite answer on this !!!
5982 * Lots of entity decls are used to declare a single
5983 * char
5984 * <!ENTITY lt "<">
5985 * Which seems to be valid since
5986 * 2.4: The ampersand character (&) and the left angle
5987 * bracket (<) may appear in their literal form only
5988 * when used ... They are also legal within the literal
5989 * entity value of an internal entity declaration;i
5990 * see "4.3.2 Well-Formed Parsed Entities".
5991 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5992 * Looking at the OASIS test suite and James Clark
5993 * tests, this is broken. However the XML REC uses
5994 * it. Is the XML REC not well-formed ????
5995 * This is a hack to avoid this problem
5996 *
5997 * ANSWER: since lt gt amp .. are already defined,
5998 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005999 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006000 * is lousy but acceptable.
6001 */
6002 list = xmlNewDocText(ctxt->myDoc, value);
6003 if (list != NULL) {
6004 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6005 (ent->children == NULL)) {
6006 ent->children = list;
6007 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006008 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006009 list->parent = (xmlNodePtr) ent;
6010 } else {
6011 xmlFreeNodeList(list);
6012 }
6013 } else if (list != NULL) {
6014 xmlFreeNodeList(list);
6015 }
6016 } else {
6017 /*
6018 * 4.3.2: An internal general parsed entity is well-formed
6019 * if its replacement text matches the production labeled
6020 * content.
6021 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006022
6023 void *user_data;
6024 /*
6025 * This is a bit hackish but this seems the best
6026 * way to make sure both SAX and DOM entity support
6027 * behaves okay.
6028 */
6029 if (ctxt->userData == ctxt)
6030 user_data = NULL;
6031 else
6032 user_data = ctxt->userData;
6033
Owen Taylor3473f882001-02-23 17:55:21 +00006034 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6035 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006036 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6037 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006038 ctxt->depth--;
6039 } else if (ent->etype ==
6040 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6041 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006042 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006043 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006044 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 ctxt->depth--;
6046 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006047 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006048 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6049 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006050 }
6051 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006052 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006053 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006054 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006055 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6056 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006057 (ent->children == NULL)) {
6058 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006059 if (ctxt->replaceEntities) {
6060 /*
6061 * Prune it directly in the generated document
6062 * except for single text nodes.
6063 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006064 if (((list->type == XML_TEXT_NODE) &&
6065 (list->next == NULL)) ||
6066 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006067 list->parent = (xmlNodePtr) ent;
6068 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006069 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006070 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006071 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006072 while (list != NULL) {
6073 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006074 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006075 if (list->next == NULL)
6076 ent->last = list;
6077 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006078 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006079 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006080#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006081 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6082 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006083#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006084 }
6085 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006086 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006087 while (list != NULL) {
6088 list->parent = (xmlNodePtr) ent;
6089 if (list->next == NULL)
6090 ent->last = list;
6091 list = list->next;
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 } else {
6095 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006096 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006098 } else if ((ret != XML_ERR_OK) &&
6099 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006100 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 } else if (list != NULL) {
6102 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006103 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 }
6105 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006106 ent->checked = 1;
6107 }
6108
6109 if (ent->children == NULL) {
6110 /*
6111 * Probably running in SAX mode and the callbacks don't
6112 * build the entity content. So unless we already went
6113 * though parsing for first checking go though the entity
6114 * content to generate callbacks associated to the entity
6115 */
6116 if (was_checked == 1) {
6117 void *user_data;
6118 /*
6119 * This is a bit hackish but this seems the best
6120 * way to make sure both SAX and DOM entity support
6121 * behaves okay.
6122 */
6123 if (ctxt->userData == ctxt)
6124 user_data = NULL;
6125 else
6126 user_data = ctxt->userData;
6127
6128 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6129 ctxt->depth++;
6130 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6131 ent->content, user_data, NULL);
6132 ctxt->depth--;
6133 } else if (ent->etype ==
6134 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6135 ctxt->depth++;
6136 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6137 ctxt->sax, user_data, ctxt->depth,
6138 ent->URI, ent->ExternalID, NULL);
6139 ctxt->depth--;
6140 } else {
6141 ret = XML_ERR_ENTITY_PE_INTERNAL;
6142 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6143 "invalid entity type found\n", NULL);
6144 }
6145 if (ret == XML_ERR_ENTITY_LOOP) {
6146 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6147 return;
6148 }
6149 }
6150 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6151 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6152 /*
6153 * Entity reference callback comes second, it's somewhat
6154 * superfluous but a compatibility to historical behaviour
6155 */
6156 ctxt->sax->reference(ctxt->userData, ent->name);
6157 }
6158 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006159 }
6160 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006161 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006162 /*
6163 * Create a node.
6164 */
6165 ctxt->sax->reference(ctxt->userData, ent->name);
6166 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006167 }
6168 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006169 /*
6170 * There is a problem on the handling of _private for entities
6171 * (bug 155816): Should we copy the content of the field from
6172 * the entity (possibly overwriting some value set by the user
6173 * when a copy is created), should we leave it alone, or should
6174 * we try to take care of different situations? The problem
6175 * is exacerbated by the usage of this field by the xmlReader.
6176 * To fix this bug, we look at _private on the created node
6177 * and, if it's NULL, we copy in whatever was in the entity.
6178 * If it's not NULL we leave it alone. This is somewhat of a
6179 * hack - maybe we should have further tests to determine
6180 * what to do.
6181 */
Owen Taylor3473f882001-02-23 17:55:21 +00006182 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6183 /*
6184 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006185 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006186 * In the first occurrence list contains the replacement.
6187 * progressive == 2 means we are operating on the Reader
6188 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006189 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006190 if (((list == NULL) && (ent->owner == 0)) ||
6191 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006192 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006193
6194 /*
6195 * when operating on a reader, the entities definitions
6196 * are always owning the entities subtree.
6197 if (ctxt->parseMode == XML_PARSE_READER)
6198 ent->owner = 1;
6199 */
6200
Daniel Veillard62f313b2001-07-04 19:49:14 +00006201 cur = ent->children;
6202 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006203 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006204 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006205 if (nw->_private == NULL)
6206 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006207 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006208 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006209 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006210 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006211 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006212 if (cur == ent->last) {
6213 /*
6214 * needed to detect some strange empty
6215 * node cases in the reader tests
6216 */
6217 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006218 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006219 (nw->type == XML_ELEMENT_NODE) &&
6220 (nw->children == NULL))
6221 nw->extra = 1;
6222
Daniel Veillard62f313b2001-07-04 19:49:14 +00006223 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006224 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006225 cur = cur->next;
6226 }
Daniel Veillard81273902003-09-30 00:43:48 +00006227#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006228 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006229 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006230#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006231 } else if (list == NULL) {
6232 xmlNodePtr nw = NULL, cur, next, last,
6233 firstChild = NULL;
6234 /*
6235 * Copy the entity child list and make it the new
6236 * entity child list. The goal is to make sure any
6237 * ID or REF referenced will be the one from the
6238 * document content and not the entity copy.
6239 */
6240 cur = ent->children;
6241 ent->children = NULL;
6242 last = ent->last;
6243 ent->last = NULL;
6244 while (cur != NULL) {
6245 next = cur->next;
6246 cur->next = NULL;
6247 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006248 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006249 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006250 if (nw->_private == NULL)
6251 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006252 if (firstChild == NULL){
6253 firstChild = cur;
6254 }
6255 xmlAddChild((xmlNodePtr) ent, nw);
6256 xmlAddChild(ctxt->node, cur);
6257 }
6258 if (cur == last)
6259 break;
6260 cur = next;
6261 }
6262 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006263#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006264 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6265 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006266#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006267 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006268 const xmlChar *nbktext;
6269
Daniel Veillard62f313b2001-07-04 19:49:14 +00006270 /*
6271 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006272 * node with a possible previous text one which
6273 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006274 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006275 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6276 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006277 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006278 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006279 if ((ent->last != ent->children) &&
6280 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006281 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006282 xmlAddChildList(ctxt->node, ent->children);
6283 }
6284
Owen Taylor3473f882001-02-23 17:55:21 +00006285 /*
6286 * This is to avoid a nasty side effect, see
6287 * characters() in SAX.c
6288 */
6289 ctxt->nodemem = 0;
6290 ctxt->nodelen = 0;
6291 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006292 }
6293 }
6294 } else {
6295 val = ent->content;
6296 if (val == NULL) return;
6297 /*
6298 * inline the entity.
6299 */
6300 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6301 (!ctxt->disableSAX))
6302 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6303 }
6304 }
6305}
6306
6307/**
6308 * xmlParseEntityRef:
6309 * @ctxt: an XML parser context
6310 *
6311 * parse ENTITY references declarations
6312 *
6313 * [68] EntityRef ::= '&' Name ';'
6314 *
6315 * [ WFC: Entity Declared ]
6316 * In a document without any DTD, a document with only an internal DTD
6317 * subset which contains no parameter entity references, or a document
6318 * with "standalone='yes'", the Name given in the entity reference
6319 * must match that in an entity declaration, except that well-formed
6320 * documents need not declare any of the following entities: amp, lt,
6321 * gt, apos, quot. The declaration of a parameter entity must precede
6322 * any reference to it. Similarly, the declaration of a general entity
6323 * must precede any reference to it which appears in a default value in an
6324 * attribute-list declaration. Note that if entities are declared in the
6325 * external subset or in external parameter entities, a non-validating
6326 * processor is not obligated to read and process their declarations;
6327 * for such documents, the rule that an entity must be declared is a
6328 * well-formedness constraint only if standalone='yes'.
6329 *
6330 * [ WFC: Parsed Entity ]
6331 * An entity reference must not contain the name of an unparsed entity
6332 *
6333 * Returns the xmlEntityPtr if found, or NULL otherwise.
6334 */
6335xmlEntityPtr
6336xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006337 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 xmlEntityPtr ent = NULL;
6339
6340 GROW;
6341
6342 if (RAW == '&') {
6343 NEXT;
6344 name = xmlParseName(ctxt);
6345 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006346 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6347 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006348 } else {
6349 if (RAW == ';') {
6350 NEXT;
6351 /*
6352 * Ask first SAX for entity resolution, otherwise try the
6353 * predefined set.
6354 */
6355 if (ctxt->sax != NULL) {
6356 if (ctxt->sax->getEntity != NULL)
6357 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006358 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006359 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006360 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6361 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006362 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006363 }
Owen Taylor3473f882001-02-23 17:55:21 +00006364 }
6365 /*
6366 * [ WFC: Entity Declared ]
6367 * In a document without any DTD, a document with only an
6368 * internal DTD subset which contains no parameter entity
6369 * references, or a document with "standalone='yes'", the
6370 * Name given in the entity reference must match that in an
6371 * entity declaration, except that well-formed documents
6372 * need not declare any of the following entities: amp, lt,
6373 * gt, apos, quot.
6374 * The declaration of a parameter entity must precede any
6375 * reference to it.
6376 * Similarly, the declaration of a general entity must
6377 * precede any reference to it which appears in a default
6378 * value in an attribute-list declaration. Note that if
6379 * entities are declared in the external subset or in
6380 * external parameter entities, a non-validating processor
6381 * is not obligated to read and process their declarations;
6382 * for such documents, the rule that an entity must be
6383 * declared is a well-formedness constraint only if
6384 * standalone='yes'.
6385 */
6386 if (ent == NULL) {
6387 if ((ctxt->standalone == 1) ||
6388 ((ctxt->hasExternalSubset == 0) &&
6389 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006390 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006391 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006392 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006393 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006394 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006395 if ((ctxt->inSubset == 0) &&
6396 (ctxt->sax != NULL) &&
6397 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006398 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006399 }
Owen Taylor3473f882001-02-23 17:55:21 +00006400 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006401 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006402 }
6403
6404 /*
6405 * [ WFC: Parsed Entity ]
6406 * An entity reference must not contain the name of an
6407 * unparsed entity
6408 */
6409 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006410 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006411 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 }
6413
6414 /*
6415 * [ WFC: No External Entity References ]
6416 * Attribute values cannot contain direct or indirect
6417 * entity references to external entities.
6418 */
6419 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6420 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006421 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6422 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006423 }
6424 /*
6425 * [ WFC: No < in Attribute Values ]
6426 * The replacement text of any entity referred to directly or
6427 * indirectly in an attribute value (other than "&lt;") must
6428 * not contain a <.
6429 */
6430 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6431 (ent != NULL) &&
6432 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6433 (ent->content != NULL) &&
6434 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006435 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006436 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006437 }
6438
6439 /*
6440 * Internal check, no parameter entities here ...
6441 */
6442 else {
6443 switch (ent->etype) {
6444 case XML_INTERNAL_PARAMETER_ENTITY:
6445 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006446 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6447 "Attempt to reference the parameter entity '%s'\n",
6448 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006449 break;
6450 default:
6451 break;
6452 }
6453 }
6454
6455 /*
6456 * [ WFC: No Recursion ]
6457 * A parsed entity must not contain a recursive reference
6458 * to itself, either directly or indirectly.
6459 * Done somewhere else
6460 */
6461
6462 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006463 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006464 }
Owen Taylor3473f882001-02-23 17:55:21 +00006465 }
6466 }
6467 return(ent);
6468}
6469
6470/**
6471 * xmlParseStringEntityRef:
6472 * @ctxt: an XML parser context
6473 * @str: a pointer to an index in the string
6474 *
6475 * parse ENTITY references declarations, but this version parses it from
6476 * a string value.
6477 *
6478 * [68] EntityRef ::= '&' Name ';'
6479 *
6480 * [ WFC: Entity Declared ]
6481 * In a document without any DTD, a document with only an internal DTD
6482 * subset which contains no parameter entity references, or a document
6483 * with "standalone='yes'", the Name given in the entity reference
6484 * must match that in an entity declaration, except that well-formed
6485 * documents need not declare any of the following entities: amp, lt,
6486 * gt, apos, quot. The declaration of a parameter entity must precede
6487 * any reference to it. Similarly, the declaration of a general entity
6488 * must precede any reference to it which appears in a default value in an
6489 * attribute-list declaration. Note that if entities are declared in the
6490 * external subset or in external parameter entities, a non-validating
6491 * processor is not obligated to read and process their declarations;
6492 * for such documents, the rule that an entity must be declared is a
6493 * well-formedness constraint only if standalone='yes'.
6494 *
6495 * [ WFC: Parsed Entity ]
6496 * An entity reference must not contain the name of an unparsed entity
6497 *
6498 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6499 * is updated to the current location in the string.
6500 */
6501xmlEntityPtr
6502xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6503 xmlChar *name;
6504 const xmlChar *ptr;
6505 xmlChar cur;
6506 xmlEntityPtr ent = NULL;
6507
6508 if ((str == NULL) || (*str == NULL))
6509 return(NULL);
6510 ptr = *str;
6511 cur = *ptr;
6512 if (cur == '&') {
6513 ptr++;
6514 cur = *ptr;
6515 name = xmlParseStringName(ctxt, &ptr);
6516 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006517 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6518 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006519 } else {
6520 if (*ptr == ';') {
6521 ptr++;
6522 /*
6523 * Ask first SAX for entity resolution, otherwise try the
6524 * predefined set.
6525 */
6526 if (ctxt->sax != NULL) {
6527 if (ctxt->sax->getEntity != NULL)
6528 ent = ctxt->sax->getEntity(ctxt->userData, name);
6529 if (ent == NULL)
6530 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006531 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006532 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006533 }
Owen Taylor3473f882001-02-23 17:55:21 +00006534 }
6535 /*
6536 * [ WFC: Entity Declared ]
6537 * In a document without any DTD, a document with only an
6538 * internal DTD subset which contains no parameter entity
6539 * references, or a document with "standalone='yes'", the
6540 * Name given in the entity reference must match that in an
6541 * entity declaration, except that well-formed documents
6542 * need not declare any of the following entities: amp, lt,
6543 * gt, apos, quot.
6544 * The declaration of a parameter entity must precede any
6545 * reference to it.
6546 * Similarly, the declaration of a general entity must
6547 * precede any reference to it which appears in a default
6548 * value in an attribute-list declaration. Note that if
6549 * entities are declared in the external subset or in
6550 * external parameter entities, a non-validating processor
6551 * is not obligated to read and process their declarations;
6552 * for such documents, the rule that an entity must be
6553 * declared is a well-formedness constraint only if
6554 * standalone='yes'.
6555 */
6556 if (ent == NULL) {
6557 if ((ctxt->standalone == 1) ||
6558 ((ctxt->hasExternalSubset == 0) &&
6559 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006560 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006561 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006563 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006564 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006565 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006567 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006568 }
6569
6570 /*
6571 * [ WFC: Parsed Entity ]
6572 * An entity reference must not contain the name of an
6573 * unparsed entity
6574 */
6575 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006576 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006577 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006578 }
6579
6580 /*
6581 * [ WFC: No External Entity References ]
6582 * Attribute values cannot contain direct or indirect
6583 * entity references to external entities.
6584 */
6585 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6586 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006587 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006588 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006589 }
6590 /*
6591 * [ WFC: No < in Attribute Values ]
6592 * The replacement text of any entity referred to directly or
6593 * indirectly in an attribute value (other than "&lt;") must
6594 * not contain a <.
6595 */
6596 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6597 (ent != NULL) &&
6598 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6599 (ent->content != NULL) &&
6600 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006601 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6602 "'<' in entity '%s' is not allowed in attributes values\n",
6603 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006604 }
6605
6606 /*
6607 * Internal check, no parameter entities here ...
6608 */
6609 else {
6610 switch (ent->etype) {
6611 case XML_INTERNAL_PARAMETER_ENTITY:
6612 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006613 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6614 "Attempt to reference the parameter entity '%s'\n",
6615 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006616 break;
6617 default:
6618 break;
6619 }
6620 }
6621
6622 /*
6623 * [ WFC: No Recursion ]
6624 * A parsed entity must not contain a recursive reference
6625 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006626 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006627 */
6628
6629 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006630 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006631 }
6632 xmlFree(name);
6633 }
6634 }
6635 *str = ptr;
6636 return(ent);
6637}
6638
6639/**
6640 * xmlParsePEReference:
6641 * @ctxt: an XML parser context
6642 *
6643 * parse PEReference declarations
6644 * The entity content is handled directly by pushing it's content as
6645 * a new input stream.
6646 *
6647 * [69] PEReference ::= '%' Name ';'
6648 *
6649 * [ WFC: No Recursion ]
6650 * A parsed entity must not contain a recursive
6651 * reference to itself, either directly or indirectly.
6652 *
6653 * [ WFC: Entity Declared ]
6654 * In a document without any DTD, a document with only an internal DTD
6655 * subset which contains no parameter entity references, or a document
6656 * with "standalone='yes'", ... ... The declaration of a parameter
6657 * entity must precede any reference to it...
6658 *
6659 * [ VC: Entity Declared ]
6660 * In a document with an external subset or external parameter entities
6661 * with "standalone='no'", ... ... The declaration of a parameter entity
6662 * must precede any reference to it...
6663 *
6664 * [ WFC: In DTD ]
6665 * Parameter-entity references may only appear in the DTD.
6666 * NOTE: misleading but this is handled.
6667 */
6668void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006669xmlParsePEReference(xmlParserCtxtPtr ctxt)
6670{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006671 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006672 xmlEntityPtr entity = NULL;
6673 xmlParserInputPtr input;
6674
6675 if (RAW == '%') {
6676 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006677 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006678 if (name == NULL) {
6679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6680 "xmlParsePEReference: no name\n");
6681 } else {
6682 if (RAW == ';') {
6683 NEXT;
6684 if ((ctxt->sax != NULL) &&
6685 (ctxt->sax->getParameterEntity != NULL))
6686 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6687 name);
6688 if (entity == NULL) {
6689 /*
6690 * [ WFC: Entity Declared ]
6691 * In a document without any DTD, a document with only an
6692 * internal DTD subset which contains no parameter entity
6693 * references, or a document with "standalone='yes'", ...
6694 * ... The declaration of a parameter entity must precede
6695 * any reference to it...
6696 */
6697 if ((ctxt->standalone == 1) ||
6698 ((ctxt->hasExternalSubset == 0) &&
6699 (ctxt->hasPErefs == 0))) {
6700 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6701 "PEReference: %%%s; not found\n",
6702 name);
6703 } else {
6704 /*
6705 * [ VC: Entity Declared ]
6706 * In a document with an external subset or external
6707 * parameter entities with "standalone='no'", ...
6708 * ... The declaration of a parameter entity must
6709 * precede any reference to it...
6710 */
6711 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6712 "PEReference: %%%s; not found\n",
6713 name, NULL);
6714 ctxt->valid = 0;
6715 }
6716 } else {
6717 /*
6718 * Internal checking in case the entity quest barfed
6719 */
6720 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6721 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6722 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6723 "Internal: %%%s; is not a parameter entity\n",
6724 name, NULL);
6725 } else if (ctxt->input->free != deallocblankswrapper) {
6726 input =
6727 xmlNewBlanksWrapperInputStream(ctxt, entity);
6728 xmlPushInput(ctxt, input);
6729 } else {
6730 /*
6731 * TODO !!!
6732 * handle the extra spaces added before and after
6733 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6734 */
6735 input = xmlNewEntityInputStream(ctxt, entity);
6736 xmlPushInput(ctxt, input);
6737 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006738 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006739 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006740 xmlParseTextDecl(ctxt);
6741 if (ctxt->errNo ==
6742 XML_ERR_UNSUPPORTED_ENCODING) {
6743 /*
6744 * The XML REC instructs us to stop parsing
6745 * right here
6746 */
6747 ctxt->instate = XML_PARSER_EOF;
6748 return;
6749 }
6750 }
6751 }
6752 }
6753 ctxt->hasPErefs = 1;
6754 } else {
6755 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6756 }
6757 }
Owen Taylor3473f882001-02-23 17:55:21 +00006758 }
6759}
6760
6761/**
6762 * xmlParseStringPEReference:
6763 * @ctxt: an XML parser context
6764 * @str: a pointer to an index in the string
6765 *
6766 * parse PEReference declarations
6767 *
6768 * [69] PEReference ::= '%' Name ';'
6769 *
6770 * [ WFC: No Recursion ]
6771 * A parsed entity must not contain a recursive
6772 * reference to itself, either directly or indirectly.
6773 *
6774 * [ WFC: Entity Declared ]
6775 * In a document without any DTD, a document with only an internal DTD
6776 * subset which contains no parameter entity references, or a document
6777 * with "standalone='yes'", ... ... The declaration of a parameter
6778 * entity must precede any reference to it...
6779 *
6780 * [ VC: Entity Declared ]
6781 * In a document with an external subset or external parameter entities
6782 * with "standalone='no'", ... ... The declaration of a parameter entity
6783 * must precede any reference to it...
6784 *
6785 * [ WFC: In DTD ]
6786 * Parameter-entity references may only appear in the DTD.
6787 * NOTE: misleading but this is handled.
6788 *
6789 * Returns the string of the entity content.
6790 * str is updated to the current value of the index
6791 */
6792xmlEntityPtr
6793xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6794 const xmlChar *ptr;
6795 xmlChar cur;
6796 xmlChar *name;
6797 xmlEntityPtr entity = NULL;
6798
6799 if ((str == NULL) || (*str == NULL)) return(NULL);
6800 ptr = *str;
6801 cur = *ptr;
6802 if (cur == '%') {
6803 ptr++;
6804 cur = *ptr;
6805 name = xmlParseStringName(ctxt, &ptr);
6806 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006807 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6808 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006809 } else {
6810 cur = *ptr;
6811 if (cur == ';') {
6812 ptr++;
6813 cur = *ptr;
6814 if ((ctxt->sax != NULL) &&
6815 (ctxt->sax->getParameterEntity != NULL))
6816 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6817 name);
6818 if (entity == NULL) {
6819 /*
6820 * [ WFC: Entity Declared ]
6821 * In a document without any DTD, a document with only an
6822 * internal DTD subset which contains no parameter entity
6823 * references, or a document with "standalone='yes'", ...
6824 * ... The declaration of a parameter entity must precede
6825 * any reference to it...
6826 */
6827 if ((ctxt->standalone == 1) ||
6828 ((ctxt->hasExternalSubset == 0) &&
6829 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006830 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006831 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006832 } else {
6833 /*
6834 * [ VC: Entity Declared ]
6835 * In a document with an external subset or external
6836 * parameter entities with "standalone='no'", ...
6837 * ... The declaration of a parameter entity must
6838 * precede any reference to it...
6839 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006840 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6841 "PEReference: %%%s; not found\n",
6842 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006843 ctxt->valid = 0;
6844 }
6845 } else {
6846 /*
6847 * Internal checking in case the entity quest barfed
6848 */
6849 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6850 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006851 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6852 "%%%s; is not a parameter entity\n",
6853 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006854 }
6855 }
6856 ctxt->hasPErefs = 1;
6857 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006858 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006859 }
6860 xmlFree(name);
6861 }
6862 }
6863 *str = ptr;
6864 return(entity);
6865}
6866
6867/**
6868 * xmlParseDocTypeDecl:
6869 * @ctxt: an XML parser context
6870 *
6871 * parse a DOCTYPE declaration
6872 *
6873 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6874 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6875 *
6876 * [ VC: Root Element Type ]
6877 * The Name in the document type declaration must match the element
6878 * type of the root element.
6879 */
6880
6881void
6882xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006883 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006884 xmlChar *ExternalID = NULL;
6885 xmlChar *URI = NULL;
6886
6887 /*
6888 * We know that '<!DOCTYPE' has been detected.
6889 */
6890 SKIP(9);
6891
6892 SKIP_BLANKS;
6893
6894 /*
6895 * Parse the DOCTYPE name.
6896 */
6897 name = xmlParseName(ctxt);
6898 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6900 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006901 }
6902 ctxt->intSubName = name;
6903
6904 SKIP_BLANKS;
6905
6906 /*
6907 * Check for SystemID and ExternalID
6908 */
6909 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6910
6911 if ((URI != NULL) || (ExternalID != NULL)) {
6912 ctxt->hasExternalSubset = 1;
6913 }
6914 ctxt->extSubURI = URI;
6915 ctxt->extSubSystem = ExternalID;
6916
6917 SKIP_BLANKS;
6918
6919 /*
6920 * Create and update the internal subset.
6921 */
6922 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6923 (!ctxt->disableSAX))
6924 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6925
6926 /*
6927 * Is there any internal subset declarations ?
6928 * they are handled separately in xmlParseInternalSubset()
6929 */
6930 if (RAW == '[')
6931 return;
6932
6933 /*
6934 * We should be at the end of the DOCTYPE declaration.
6935 */
6936 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006937 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006938 }
6939 NEXT;
6940}
6941
6942/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006943 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006944 * @ctxt: an XML parser context
6945 *
6946 * parse the internal subset declaration
6947 *
6948 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6949 */
6950
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006951static void
Owen Taylor3473f882001-02-23 17:55:21 +00006952xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6953 /*
6954 * Is there any DTD definition ?
6955 */
6956 if (RAW == '[') {
6957 ctxt->instate = XML_PARSER_DTD;
6958 NEXT;
6959 /*
6960 * Parse the succession of Markup declarations and
6961 * PEReferences.
6962 * Subsequence (markupdecl | PEReference | S)*
6963 */
6964 while (RAW != ']') {
6965 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006966 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006967
6968 SKIP_BLANKS;
6969 xmlParseMarkupDecl(ctxt);
6970 xmlParsePEReference(ctxt);
6971
6972 /*
6973 * Pop-up of finished entities.
6974 */
6975 while ((RAW == 0) && (ctxt->inputNr > 1))
6976 xmlPopInput(ctxt);
6977
6978 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006979 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006980 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006981 break;
6982 }
6983 }
6984 if (RAW == ']') {
6985 NEXT;
6986 SKIP_BLANKS;
6987 }
6988 }
6989
6990 /*
6991 * We should be at the end of the DOCTYPE declaration.
6992 */
6993 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006994 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006995 }
6996 NEXT;
6997}
6998
Daniel Veillard81273902003-09-30 00:43:48 +00006999#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007000/**
7001 * xmlParseAttribute:
7002 * @ctxt: an XML parser context
7003 * @value: a xmlChar ** used to store the value of the attribute
7004 *
7005 * parse an attribute
7006 *
7007 * [41] Attribute ::= Name Eq AttValue
7008 *
7009 * [ WFC: No External Entity References ]
7010 * Attribute values cannot contain direct or indirect entity references
7011 * to external entities.
7012 *
7013 * [ WFC: No < in Attribute Values ]
7014 * The replacement text of any entity referred to directly or indirectly in
7015 * an attribute value (other than "&lt;") must not contain a <.
7016 *
7017 * [ VC: Attribute Value Type ]
7018 * The attribute must have been declared; the value must be of the type
7019 * declared for it.
7020 *
7021 * [25] Eq ::= S? '=' S?
7022 *
7023 * With namespace:
7024 *
7025 * [NS 11] Attribute ::= QName Eq AttValue
7026 *
7027 * Also the case QName == xmlns:??? is handled independently as a namespace
7028 * definition.
7029 *
7030 * Returns the attribute name, and the value in *value.
7031 */
7032
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007033const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007034xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007035 const xmlChar *name;
7036 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007037
7038 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007039 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007040 name = xmlParseName(ctxt);
7041 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007042 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007043 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007044 return(NULL);
7045 }
7046
7047 /*
7048 * read the value
7049 */
7050 SKIP_BLANKS;
7051 if (RAW == '=') {
7052 NEXT;
7053 SKIP_BLANKS;
7054 val = xmlParseAttValue(ctxt);
7055 ctxt->instate = XML_PARSER_CONTENT;
7056 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007057 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007058 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007059 return(NULL);
7060 }
7061
7062 /*
7063 * Check that xml:lang conforms to the specification
7064 * No more registered as an error, just generate a warning now
7065 * since this was deprecated in XML second edition
7066 */
7067 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7068 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007069 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7070 "Malformed value for xml:lang : %s\n",
7071 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007072 }
7073 }
7074
7075 /*
7076 * Check that xml:space conforms to the specification
7077 */
7078 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7079 if (xmlStrEqual(val, BAD_CAST "default"))
7080 *(ctxt->space) = 0;
7081 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7082 *(ctxt->space) = 1;
7083 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007084 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007085"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007086 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007087 }
7088 }
7089
7090 *value = val;
7091 return(name);
7092}
7093
7094/**
7095 * xmlParseStartTag:
7096 * @ctxt: an XML parser context
7097 *
7098 * parse a start of tag either for rule element or
7099 * EmptyElement. In both case we don't parse the tag closing chars.
7100 *
7101 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7102 *
7103 * [ WFC: Unique Att Spec ]
7104 * No attribute name may appear more than once in the same start-tag or
7105 * empty-element tag.
7106 *
7107 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7108 *
7109 * [ WFC: Unique Att Spec ]
7110 * No attribute name may appear more than once in the same start-tag or
7111 * empty-element tag.
7112 *
7113 * With namespace:
7114 *
7115 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7116 *
7117 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7118 *
7119 * Returns the element name parsed
7120 */
7121
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007122const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007123xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007124 const xmlChar *name;
7125 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007126 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007127 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007128 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007129 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007130 int i;
7131
7132 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007133 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007134
7135 name = xmlParseName(ctxt);
7136 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007138 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007139 return(NULL);
7140 }
7141
7142 /*
7143 * Now parse the attributes, it ends up with the ending
7144 *
7145 * (S Attribute)* S?
7146 */
7147 SKIP_BLANKS;
7148 GROW;
7149
Daniel Veillard21a0f912001-02-25 19:54:14 +00007150 while ((RAW != '>') &&
7151 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007152 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007153 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007154 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007155
7156 attname = xmlParseAttribute(ctxt, &attvalue);
7157 if ((attname != NULL) && (attvalue != NULL)) {
7158 /*
7159 * [ WFC: Unique Att Spec ]
7160 * No attribute name may appear more than once in the same
7161 * start-tag or empty-element tag.
7162 */
7163 for (i = 0; i < nbatts;i += 2) {
7164 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007165 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007166 xmlFree(attvalue);
7167 goto failed;
7168 }
7169 }
Owen Taylor3473f882001-02-23 17:55:21 +00007170 /*
7171 * Add the pair to atts
7172 */
7173 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007174 maxatts = 22; /* allow for 10 attrs by default */
7175 atts = (const xmlChar **)
7176 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007177 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007178 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007179 if (attvalue != NULL)
7180 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007181 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007183 ctxt->atts = atts;
7184 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007185 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007186 const xmlChar **n;
7187
Owen Taylor3473f882001-02-23 17:55:21 +00007188 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007189 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007190 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007191 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007192 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007193 if (attvalue != NULL)
7194 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007195 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007196 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007197 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007198 ctxt->atts = atts;
7199 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007200 }
7201 atts[nbatts++] = attname;
7202 atts[nbatts++] = attvalue;
7203 atts[nbatts] = NULL;
7204 atts[nbatts + 1] = NULL;
7205 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007206 if (attvalue != NULL)
7207 xmlFree(attvalue);
7208 }
7209
7210failed:
7211
Daniel Veillard3772de32002-12-17 10:31:45 +00007212 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007213 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7214 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007215 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007216 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7217 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007218 }
7219 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007220 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7221 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007222 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7223 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007224 break;
7225 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007226 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007227 GROW;
7228 }
7229
7230 /*
7231 * SAX: Start of Element !
7232 */
7233 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007234 (!ctxt->disableSAX)) {
7235 if (nbatts > 0)
7236 ctxt->sax->startElement(ctxt->userData, name, atts);
7237 else
7238 ctxt->sax->startElement(ctxt->userData, name, NULL);
7239 }
Owen Taylor3473f882001-02-23 17:55:21 +00007240
7241 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007242 /* Free only the content strings */
7243 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007244 if (atts[i] != NULL)
7245 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007246 }
7247 return(name);
7248}
7249
7250/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007251 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007252 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007253 * @line: line of the start tag
7254 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007255 *
7256 * parse an end of tag
7257 *
7258 * [42] ETag ::= '</' Name S? '>'
7259 *
7260 * With namespace
7261 *
7262 * [NS 9] ETag ::= '</' QName S? '>'
7263 */
7264
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007265static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007267 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007268
7269 GROW;
7270 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007271 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007272 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007273 return;
7274 }
7275 SKIP(2);
7276
Daniel Veillard46de64e2002-05-29 08:21:33 +00007277 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007278
7279 /*
7280 * We should definitely be at the ending "S? '>'" part
7281 */
7282 GROW;
7283 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007284 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007285 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007286 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007287 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007288
7289 /*
7290 * [ WFC: Element Type Match ]
7291 * The Name in an element's end-tag must match the element type in the
7292 * start-tag.
7293 *
7294 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007295 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007296 if (name == NULL) name = BAD_CAST "unparseable";
7297 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007298 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007299 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007300 }
7301
7302 /*
7303 * SAX: End of Tag
7304 */
7305 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7306 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007307 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007308
Daniel Veillarde57ec792003-09-10 10:50:59 +00007309 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007310 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007311 return;
7312}
7313
7314/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007315 * xmlParseEndTag:
7316 * @ctxt: an XML parser context
7317 *
7318 * parse an end of tag
7319 *
7320 * [42] ETag ::= '</' Name S? '>'
7321 *
7322 * With namespace
7323 *
7324 * [NS 9] ETag ::= '</' QName S? '>'
7325 */
7326
7327void
7328xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007329 xmlParseEndTag1(ctxt, 0);
7330}
Daniel Veillard81273902003-09-30 00:43:48 +00007331#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007332
7333/************************************************************************
7334 * *
7335 * SAX 2 specific operations *
7336 * *
7337 ************************************************************************/
7338
7339static const xmlChar *
7340xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7341 int len = 0, l;
7342 int c;
7343 int count = 0;
7344
7345 /*
7346 * Handler for more complex cases
7347 */
7348 GROW;
7349 c = CUR_CHAR(l);
7350 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007351 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007352 return(NULL);
7353 }
7354
7355 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007356 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007357 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007358 (IS_COMBINING(c)) ||
7359 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007360 if (count++ > 100) {
7361 count = 0;
7362 GROW;
7363 }
7364 len += l;
7365 NEXTL(l);
7366 c = CUR_CHAR(l);
7367 }
7368 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7369}
7370
7371/*
7372 * xmlGetNamespace:
7373 * @ctxt: an XML parser context
7374 * @prefix: the prefix to lookup
7375 *
7376 * Lookup the namespace name for the @prefix (which ca be NULL)
7377 * The prefix must come from the @ctxt->dict dictionnary
7378 *
7379 * Returns the namespace name or NULL if not bound
7380 */
7381static const xmlChar *
7382xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7383 int i;
7384
Daniel Veillarde57ec792003-09-10 10:50:59 +00007385 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007386 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007387 if (ctxt->nsTab[i] == prefix) {
7388 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7389 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007391 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007392 return(NULL);
7393}
7394
7395/**
7396 * xmlParseNCName:
7397 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007398 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007399 *
7400 * parse an XML name.
7401 *
7402 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7403 * CombiningChar | Extender
7404 *
7405 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7406 *
7407 * Returns the Name parsed or NULL
7408 */
7409
7410static const xmlChar *
7411xmlParseNCName(xmlParserCtxtPtr ctxt) {
7412 const xmlChar *in;
7413 const xmlChar *ret;
7414 int count = 0;
7415
7416 /*
7417 * Accelerator for simple ASCII names
7418 */
7419 in = ctxt->input->cur;
7420 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7421 ((*in >= 0x41) && (*in <= 0x5A)) ||
7422 (*in == '_')) {
7423 in++;
7424 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7425 ((*in >= 0x41) && (*in <= 0x5A)) ||
7426 ((*in >= 0x30) && (*in <= 0x39)) ||
7427 (*in == '_') || (*in == '-') ||
7428 (*in == '.'))
7429 in++;
7430 if ((*in > 0) && (*in < 0x80)) {
7431 count = in - ctxt->input->cur;
7432 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7433 ctxt->input->cur = in;
7434 ctxt->nbChars += count;
7435 ctxt->input->col += count;
7436 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007437 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007438 }
7439 return(ret);
7440 }
7441 }
7442 return(xmlParseNCNameComplex(ctxt));
7443}
7444
7445/**
7446 * xmlParseQName:
7447 * @ctxt: an XML parser context
7448 * @prefix: pointer to store the prefix part
7449 *
7450 * parse an XML Namespace QName
7451 *
7452 * [6] QName ::= (Prefix ':')? LocalPart
7453 * [7] Prefix ::= NCName
7454 * [8] LocalPart ::= NCName
7455 *
7456 * Returns the Name parsed or NULL
7457 */
7458
7459static const xmlChar *
7460xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7461 const xmlChar *l, *p;
7462
7463 GROW;
7464
7465 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007466 if (l == NULL) {
7467 if (CUR == ':') {
7468 l = xmlParseName(ctxt);
7469 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007470 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7471 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007472 *prefix = NULL;
7473 return(l);
7474 }
7475 }
7476 return(NULL);
7477 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007478 if (CUR == ':') {
7479 NEXT;
7480 p = l;
7481 l = xmlParseNCName(ctxt);
7482 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007483 xmlChar *tmp;
7484
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007485 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7486 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007487 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7488 p = xmlDictLookup(ctxt->dict, tmp, -1);
7489 if (tmp != NULL) xmlFree(tmp);
7490 *prefix = NULL;
7491 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 }
7493 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007494 xmlChar *tmp;
7495
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007496 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7497 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007498 NEXT;
7499 tmp = (xmlChar *) xmlParseName(ctxt);
7500 if (tmp != NULL) {
7501 tmp = xmlBuildQName(tmp, l, NULL, 0);
7502 l = xmlDictLookup(ctxt->dict, tmp, -1);
7503 if (tmp != NULL) xmlFree(tmp);
7504 *prefix = p;
7505 return(l);
7506 }
7507 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7508 l = xmlDictLookup(ctxt->dict, tmp, -1);
7509 if (tmp != NULL) xmlFree(tmp);
7510 *prefix = p;
7511 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007512 }
7513 *prefix = p;
7514 } else
7515 *prefix = NULL;
7516 return(l);
7517}
7518
7519/**
7520 * xmlParseQNameAndCompare:
7521 * @ctxt: an XML parser context
7522 * @name: the localname
7523 * @prefix: the prefix, if any.
7524 *
7525 * parse an XML name and compares for match
7526 * (specialized for endtag parsing)
7527 *
7528 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7529 * and the name for mismatch
7530 */
7531
7532static const xmlChar *
7533xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7534 xmlChar const *prefix) {
7535 const xmlChar *cmp = name;
7536 const xmlChar *in;
7537 const xmlChar *ret;
7538 const xmlChar *prefix2;
7539
7540 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7541
7542 GROW;
7543 in = ctxt->input->cur;
7544
7545 cmp = prefix;
7546 while (*in != 0 && *in == *cmp) {
7547 ++in;
7548 ++cmp;
7549 }
7550 if ((*cmp == 0) && (*in == ':')) {
7551 in++;
7552 cmp = name;
7553 while (*in != 0 && *in == *cmp) {
7554 ++in;
7555 ++cmp;
7556 }
William M. Brack76e95df2003-10-18 16:20:14 +00007557 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007558 /* success */
7559 ctxt->input->cur = in;
7560 return((const xmlChar*) 1);
7561 }
7562 }
7563 /*
7564 * all strings coms from the dictionary, equality can be done directly
7565 */
7566 ret = xmlParseQName (ctxt, &prefix2);
7567 if ((ret == name) && (prefix == prefix2))
7568 return((const xmlChar*) 1);
7569 return ret;
7570}
7571
7572/**
7573 * xmlParseAttValueInternal:
7574 * @ctxt: an XML parser context
7575 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007576 * @alloc: whether the attribute was reallocated as a new string
7577 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007578 *
7579 * parse a value for an attribute.
7580 * NOTE: if no normalization is needed, the routine will return pointers
7581 * directly from the data buffer.
7582 *
7583 * 3.3.3 Attribute-Value Normalization:
7584 * Before the value of an attribute is passed to the application or
7585 * checked for validity, the XML processor must normalize it as follows:
7586 * - a character reference is processed by appending the referenced
7587 * character to the attribute value
7588 * - an entity reference is processed by recursively processing the
7589 * replacement text of the entity
7590 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7591 * appending #x20 to the normalized value, except that only a single
7592 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7593 * parsed entity or the literal entity value of an internal parsed entity
7594 * - other characters are processed by appending them to the normalized value
7595 * If the declared value is not CDATA, then the XML processor must further
7596 * process the normalized attribute value by discarding any leading and
7597 * trailing space (#x20) characters, and by replacing sequences of space
7598 * (#x20) characters by a single space (#x20) character.
7599 * All attributes for which no declaration has been read should be treated
7600 * by a non-validating parser as if declared CDATA.
7601 *
7602 * Returns the AttValue parsed or NULL. The value has to be freed by the
7603 * caller if it was copied, this can be detected by val[*len] == 0.
7604 */
7605
7606static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007607xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7608 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007609{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007610 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007611 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007612 xmlChar *ret = NULL;
7613
7614 GROW;
7615 in = (xmlChar *) CUR_PTR;
7616 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007617 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007618 return (NULL);
7619 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007620 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007621
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007622 /*
7623 * try to handle in this routine the most common case where no
7624 * allocation of a new string is required and where content is
7625 * pure ASCII.
7626 */
7627 limit = *in++;
7628 end = ctxt->input->end;
7629 start = in;
7630 if (in >= end) {
7631 const xmlChar *oldbase = ctxt->input->base;
7632 GROW;
7633 if (oldbase != ctxt->input->base) {
7634 long delta = ctxt->input->base - oldbase;
7635 start = start + delta;
7636 in = in + delta;
7637 }
7638 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007639 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007640 if (normalize) {
7641 /*
7642 * Skip any leading spaces
7643 */
7644 while ((in < end) && (*in != limit) &&
7645 ((*in == 0x20) || (*in == 0x9) ||
7646 (*in == 0xA) || (*in == 0xD))) {
7647 in++;
7648 start = in;
7649 if (in >= end) {
7650 const xmlChar *oldbase = ctxt->input->base;
7651 GROW;
7652 if (oldbase != ctxt->input->base) {
7653 long delta = ctxt->input->base - oldbase;
7654 start = start + delta;
7655 in = in + delta;
7656 }
7657 end = ctxt->input->end;
7658 }
7659 }
7660 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7661 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7662 if ((*in++ == 0x20) && (*in == 0x20)) break;
7663 if (in >= end) {
7664 const xmlChar *oldbase = ctxt->input->base;
7665 GROW;
7666 if (oldbase != ctxt->input->base) {
7667 long delta = ctxt->input->base - oldbase;
7668 start = start + delta;
7669 in = in + delta;
7670 }
7671 end = ctxt->input->end;
7672 }
7673 }
7674 last = in;
7675 /*
7676 * skip the trailing blanks
7677 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007678 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007679 while ((in < end) && (*in != limit) &&
7680 ((*in == 0x20) || (*in == 0x9) ||
7681 (*in == 0xA) || (*in == 0xD))) {
7682 in++;
7683 if (in >= end) {
7684 const xmlChar *oldbase = ctxt->input->base;
7685 GROW;
7686 if (oldbase != ctxt->input->base) {
7687 long delta = ctxt->input->base - oldbase;
7688 start = start + delta;
7689 in = in + delta;
7690 last = last + delta;
7691 }
7692 end = ctxt->input->end;
7693 }
7694 }
7695 if (*in != limit) goto need_complex;
7696 } else {
7697 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7698 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7699 in++;
7700 if (in >= end) {
7701 const xmlChar *oldbase = ctxt->input->base;
7702 GROW;
7703 if (oldbase != ctxt->input->base) {
7704 long delta = ctxt->input->base - oldbase;
7705 start = start + delta;
7706 in = in + delta;
7707 }
7708 end = ctxt->input->end;
7709 }
7710 }
7711 last = in;
7712 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007713 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007714 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007715 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007716 *len = last - start;
7717 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007718 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007719 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007720 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007721 }
7722 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007723 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007724 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007725need_complex:
7726 if (alloc) *alloc = 1;
7727 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007728}
7729
7730/**
7731 * xmlParseAttribute2:
7732 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007733 * @pref: the element prefix
7734 * @elem: the element name
7735 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007737 * @len: an int * to save the length of the attribute
7738 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007739 *
7740 * parse an attribute in the new SAX2 framework.
7741 *
7742 * Returns the attribute name, and the value in *value, .
7743 */
7744
7745static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007746xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7747 const xmlChar *pref, const xmlChar *elem,
7748 const xmlChar **prefix, xmlChar **value,
7749 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007751 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007752 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007753
7754 *value = NULL;
7755 GROW;
7756 name = xmlParseQName(ctxt, prefix);
7757 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007758 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7759 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007760 return(NULL);
7761 }
7762
7763 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007764 * get the type if needed
7765 */
7766 if (ctxt->attsSpecial != NULL) {
7767 int type;
7768
7769 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7770 pref, elem, *prefix, name);
7771 if (type != 0) normalize = 1;
7772 }
7773
7774 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007775 * read the value
7776 */
7777 SKIP_BLANKS;
7778 if (RAW == '=') {
7779 NEXT;
7780 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007781 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007782 ctxt->instate = XML_PARSER_CONTENT;
7783 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007784 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007785 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007786 return(NULL);
7787 }
7788
Daniel Veillardd8925572005-06-08 22:34:55 +00007789 if (*prefix == ctxt->str_xml) {
7790 /*
7791 * Check that xml:lang conforms to the specification
7792 * No more registered as an error, just generate a warning now
7793 * since this was deprecated in XML second edition
7794 */
7795 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7796 internal_val = xmlStrndup(val, *len);
7797 if (!xmlCheckLanguageID(internal_val)) {
7798 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7799 "Malformed value for xml:lang : %s\n",
7800 internal_val, NULL);
7801 }
7802 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007803
Daniel Veillardd8925572005-06-08 22:34:55 +00007804 /*
7805 * Check that xml:space conforms to the specification
7806 */
7807 if (xmlStrEqual(name, BAD_CAST "space")) {
7808 internal_val = xmlStrndup(val, *len);
7809 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7810 *(ctxt->space) = 0;
7811 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7812 *(ctxt->space) = 1;
7813 else {
7814 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007815"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007816 internal_val, NULL);
7817 }
7818 }
7819 if (internal_val) {
7820 xmlFree(internal_val);
7821 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007822 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007823
7824 *value = val;
7825 return(name);
7826}
7827
7828/**
7829 * xmlParseStartTag2:
7830 * @ctxt: an XML parser context
7831 *
7832 * parse a start of tag either for rule element or
7833 * EmptyElement. In both case we don't parse the tag closing chars.
7834 * This routine is called when running SAX2 parsing
7835 *
7836 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7837 *
7838 * [ WFC: Unique Att Spec ]
7839 * No attribute name may appear more than once in the same start-tag or
7840 * empty-element tag.
7841 *
7842 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7843 *
7844 * [ WFC: Unique Att Spec ]
7845 * No attribute name may appear more than once in the same start-tag or
7846 * empty-element tag.
7847 *
7848 * With namespace:
7849 *
7850 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7851 *
7852 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7853 *
7854 * Returns the element name parsed
7855 */
7856
7857static const xmlChar *
7858xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007859 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007860 const xmlChar *localname;
7861 const xmlChar *prefix;
7862 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007863 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007864 const xmlChar *nsname;
7865 xmlChar *attvalue;
7866 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007867 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007868 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007869 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007870 const xmlChar *base;
7871 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007872 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007873
7874 if (RAW != '<') return(NULL);
7875 NEXT1;
7876
7877 /*
7878 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7879 * point since the attribute values may be stored as pointers to
7880 * the buffer and calling SHRINK would destroy them !
7881 * The Shrinking is only possible once the full set of attribute
7882 * callbacks have been done.
7883 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007884reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007885 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007886 base = ctxt->input->base;
7887 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007888 oldline = ctxt->input->line;
7889 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007890 nbatts = 0;
7891 nratts = 0;
7892 nbdef = 0;
7893 nbNs = 0;
7894 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007895 /* Forget any namespaces added during an earlier parse of this element. */
7896 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007897
7898 localname = xmlParseQName(ctxt, &prefix);
7899 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007900 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7901 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007902 return(NULL);
7903 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007904 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007905
7906 /*
7907 * Now parse the attributes, it ends up with the ending
7908 *
7909 * (S Attribute)* S?
7910 */
7911 SKIP_BLANKS;
7912 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007913 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007914
7915 while ((RAW != '>') &&
7916 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007917 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007918 const xmlChar *q = CUR_PTR;
7919 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007920 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007921
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007922 attname = xmlParseAttribute2(ctxt, prefix, localname,
7923 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007924 if (ctxt->input->base != base) {
7925 if ((attvalue != NULL) && (alloc != 0))
7926 xmlFree(attvalue);
7927 attvalue = NULL;
7928 goto base_changed;
7929 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007930 if ((attname != NULL) && (attvalue != NULL)) {
7931 if (len < 0) len = xmlStrlen(attvalue);
7932 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007933 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7934 xmlURIPtr uri;
7935
7936 if (*URL != 0) {
7937 uri = xmlParseURI((const char *) URL);
7938 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007939 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7940 "xmlns: %s not a valid URI\n",
7941 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007942 } else {
7943 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007944 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7945 "xmlns: URI %s is not absolute\n",
7946 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007947 }
7948 xmlFreeURI(uri);
7949 }
7950 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007951 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007952 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007953 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007954 for (j = 1;j <= nbNs;j++)
7955 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7956 break;
7957 if (j <= nbNs)
7958 xmlErrAttributeDup(ctxt, NULL, attname);
7959 else
7960 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007961 if (alloc != 0) xmlFree(attvalue);
7962 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007963 continue;
7964 }
7965 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007966 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7967 xmlURIPtr uri;
7968
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007969 if (attname == ctxt->str_xml) {
7970 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007971 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7972 "xml namespace prefix mapped to wrong URI\n",
7973 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007974 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007975 /*
7976 * Do not keep a namespace definition node
7977 */
7978 if (alloc != 0) xmlFree(attvalue);
7979 SKIP_BLANKS;
7980 continue;
7981 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007982 uri = xmlParseURI((const char *) URL);
7983 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007984 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7985 "xmlns:%s: '%s' is not a valid URI\n",
7986 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007987 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007988 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007989 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7990 "xmlns:%s: URI %s is not absolute\n",
7991 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007992 }
7993 xmlFreeURI(uri);
7994 }
7995
Daniel Veillard0fb18932003-09-07 09:14:37 +00007996 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007997 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007998 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007999 for (j = 1;j <= nbNs;j++)
8000 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8001 break;
8002 if (j <= nbNs)
8003 xmlErrAttributeDup(ctxt, aprefix, attname);
8004 else
8005 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008006 if (alloc != 0) xmlFree(attvalue);
8007 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008008 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008009 continue;
8010 }
8011
8012 /*
8013 * Add the pair to atts
8014 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008015 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8016 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 if (attvalue[len] == 0)
8018 xmlFree(attvalue);
8019 goto failed;
8020 }
8021 maxatts = ctxt->maxatts;
8022 atts = ctxt->atts;
8023 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008024 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008025 atts[nbatts++] = attname;
8026 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008027 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008028 atts[nbatts++] = attvalue;
8029 attvalue += len;
8030 atts[nbatts++] = attvalue;
8031 /*
8032 * tag if some deallocation is needed
8033 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008034 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008035 } else {
8036 if ((attvalue != NULL) && (attvalue[len] == 0))
8037 xmlFree(attvalue);
8038 }
8039
8040failed:
8041
8042 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008043 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008044 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8045 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008046 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008047 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8048 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008049 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008050 }
8051 SKIP_BLANKS;
8052 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8053 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008054 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008055 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008056 break;
8057 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008058 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008059 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008060 }
8061
Daniel Veillard0fb18932003-09-07 09:14:37 +00008062 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008063 * The attributes defaulting
8064 */
8065 if (ctxt->attsDefault != NULL) {
8066 xmlDefAttrsPtr defaults;
8067
8068 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8069 if (defaults != NULL) {
8070 for (i = 0;i < defaults->nbAttrs;i++) {
8071 attname = defaults->values[4 * i];
8072 aprefix = defaults->values[4 * i + 1];
8073
8074 /*
8075 * special work for namespaces defaulted defs
8076 */
8077 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8078 /*
8079 * check that it's not a defined namespace
8080 */
8081 for (j = 1;j <= nbNs;j++)
8082 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8083 break;
8084 if (j <= nbNs) continue;
8085
8086 nsname = xmlGetNamespace(ctxt, NULL);
8087 if (nsname != defaults->values[4 * i + 2]) {
8088 if (nsPush(ctxt, NULL,
8089 defaults->values[4 * i + 2]) > 0)
8090 nbNs++;
8091 }
8092 } else if (aprefix == ctxt->str_xmlns) {
8093 /*
8094 * check that it's not a defined namespace
8095 */
8096 for (j = 1;j <= nbNs;j++)
8097 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8098 break;
8099 if (j <= nbNs) continue;
8100
8101 nsname = xmlGetNamespace(ctxt, attname);
8102 if (nsname != defaults->values[2]) {
8103 if (nsPush(ctxt, attname,
8104 defaults->values[4 * i + 2]) > 0)
8105 nbNs++;
8106 }
8107 } else {
8108 /*
8109 * check that it's not a defined attribute
8110 */
8111 for (j = 0;j < nbatts;j+=5) {
8112 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8113 break;
8114 }
8115 if (j < nbatts) continue;
8116
8117 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8118 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008119 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008120 }
8121 maxatts = ctxt->maxatts;
8122 atts = ctxt->atts;
8123 }
8124 atts[nbatts++] = attname;
8125 atts[nbatts++] = aprefix;
8126 if (aprefix == NULL)
8127 atts[nbatts++] = NULL;
8128 else
8129 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8130 atts[nbatts++] = defaults->values[4 * i + 2];
8131 atts[nbatts++] = defaults->values[4 * i + 3];
8132 nbdef++;
8133 }
8134 }
8135 }
8136 }
8137
Daniel Veillarde70c8772003-11-25 07:21:18 +00008138 /*
8139 * The attributes checkings
8140 */
8141 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008142 /*
8143 * The default namespace does not apply to attribute names.
8144 */
8145 if (atts[i + 1] != NULL) {
8146 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8147 if (nsname == NULL) {
8148 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8149 "Namespace prefix %s for %s on %s is not defined\n",
8150 atts[i + 1], atts[i], localname);
8151 }
8152 atts[i + 2] = nsname;
8153 } else
8154 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008155 /*
8156 * [ WFC: Unique Att Spec ]
8157 * No attribute name may appear more than once in the same
8158 * start-tag or empty-element tag.
8159 * As extended by the Namespace in XML REC.
8160 */
8161 for (j = 0; j < i;j += 5) {
8162 if (atts[i] == atts[j]) {
8163 if (atts[i+1] == atts[j+1]) {
8164 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8165 break;
8166 }
8167 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8168 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8169 "Namespaced Attribute %s in '%s' redefined\n",
8170 atts[i], nsname, NULL);
8171 break;
8172 }
8173 }
8174 }
8175 }
8176
Daniel Veillarde57ec792003-09-10 10:50:59 +00008177 nsname = xmlGetNamespace(ctxt, prefix);
8178 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008179 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8180 "Namespace prefix %s on %s is not defined\n",
8181 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008182 }
8183 *pref = prefix;
8184 *URI = nsname;
8185
8186 /*
8187 * SAX: Start of Element !
8188 */
8189 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8190 (!ctxt->disableSAX)) {
8191 if (nbNs > 0)
8192 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8193 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8194 nbatts / 5, nbdef, atts);
8195 else
8196 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8197 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8198 }
8199
8200 /*
8201 * Free up attribute allocated strings if needed
8202 */
8203 if (attval != 0) {
8204 for (i = 3,j = 0; j < nratts;i += 5,j++)
8205 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8206 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 }
8208
8209 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008210
8211base_changed:
8212 /*
8213 * the attribute strings are valid iif the base didn't changed
8214 */
8215 if (attval != 0) {
8216 for (i = 3,j = 0; j < nratts;i += 5,j++)
8217 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8218 xmlFree((xmlChar *) atts[i]);
8219 }
8220 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008221 ctxt->input->line = oldline;
8222 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008223 if (ctxt->wellFormed == 1) {
8224 goto reparse;
8225 }
8226 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008227}
8228
8229/**
8230 * xmlParseEndTag2:
8231 * @ctxt: an XML parser context
8232 * @line: line of the start tag
8233 * @nsNr: number of namespaces on the start tag
8234 *
8235 * parse an end of tag
8236 *
8237 * [42] ETag ::= '</' Name S? '>'
8238 *
8239 * With namespace
8240 *
8241 * [NS 9] ETag ::= '</' QName S? '>'
8242 */
8243
8244static void
8245xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008246 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 const xmlChar *name;
8248
8249 GROW;
8250 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008251 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008252 return;
8253 }
8254 SKIP(2);
8255
William M. Brack13dfa872004-09-18 04:52:08 +00008256 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008257 if (ctxt->input->cur[tlen] == '>') {
8258 ctxt->input->cur += tlen + 1;
8259 goto done;
8260 }
8261 ctxt->input->cur += tlen;
8262 name = (xmlChar*)1;
8263 } else {
8264 if (prefix == NULL)
8265 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8266 else
8267 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8268 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008269
8270 /*
8271 * We should definitely be at the ending "S? '>'" part
8272 */
8273 GROW;
8274 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008275 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008276 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008277 } else
8278 NEXT1;
8279
8280 /*
8281 * [ WFC: Element Type Match ]
8282 * The Name in an element's end-tag must match the element type in the
8283 * start-tag.
8284 *
8285 */
8286 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008287 if (name == NULL) name = BAD_CAST "unparseable";
8288 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008289 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008290 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008291 }
8292
8293 /*
8294 * SAX: End of Tag
8295 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008296done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008297 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8298 (!ctxt->disableSAX))
8299 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8300
Daniel Veillard0fb18932003-09-07 09:14:37 +00008301 spacePop(ctxt);
8302 if (nsNr != 0)
8303 nsPop(ctxt, nsNr);
8304 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008305}
8306
8307/**
Owen Taylor3473f882001-02-23 17:55:21 +00008308 * xmlParseCDSect:
8309 * @ctxt: an XML parser context
8310 *
8311 * Parse escaped pure raw content.
8312 *
8313 * [18] CDSect ::= CDStart CData CDEnd
8314 *
8315 * [19] CDStart ::= '<![CDATA['
8316 *
8317 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8318 *
8319 * [21] CDEnd ::= ']]>'
8320 */
8321void
8322xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8323 xmlChar *buf = NULL;
8324 int len = 0;
8325 int size = XML_PARSER_BUFFER_SIZE;
8326 int r, rl;
8327 int s, sl;
8328 int cur, l;
8329 int count = 0;
8330
Daniel Veillard8f597c32003-10-06 08:19:27 +00008331 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008332 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008333 SKIP(9);
8334 } else
8335 return;
8336
8337 ctxt->instate = XML_PARSER_CDATA_SECTION;
8338 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008339 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008340 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008341 ctxt->instate = XML_PARSER_CONTENT;
8342 return;
8343 }
8344 NEXTL(rl);
8345 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008346 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008347 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008348 ctxt->instate = XML_PARSER_CONTENT;
8349 return;
8350 }
8351 NEXTL(sl);
8352 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008353 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008354 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008355 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008356 return;
8357 }
William M. Brack871611b2003-10-18 04:53:14 +00008358 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008359 ((r != ']') || (s != ']') || (cur != '>'))) {
8360 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008361 xmlChar *tmp;
8362
Owen Taylor3473f882001-02-23 17:55:21 +00008363 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008364 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8365 if (tmp == NULL) {
8366 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008367 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008368 return;
8369 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008370 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008371 }
8372 COPY_BUF(rl,buf,len,r);
8373 r = s;
8374 rl = sl;
8375 s = cur;
8376 sl = l;
8377 count++;
8378 if (count > 50) {
8379 GROW;
8380 count = 0;
8381 }
8382 NEXTL(l);
8383 cur = CUR_CHAR(l);
8384 }
8385 buf[len] = 0;
8386 ctxt->instate = XML_PARSER_CONTENT;
8387 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008388 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008389 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008390 xmlFree(buf);
8391 return;
8392 }
8393 NEXTL(l);
8394
8395 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008396 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008397 */
8398 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8399 if (ctxt->sax->cdataBlock != NULL)
8400 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008401 else if (ctxt->sax->characters != NULL)
8402 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008403 }
8404 xmlFree(buf);
8405}
8406
8407/**
8408 * xmlParseContent:
8409 * @ctxt: an XML parser context
8410 *
8411 * Parse a content:
8412 *
8413 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8414 */
8415
8416void
8417xmlParseContent(xmlParserCtxtPtr ctxt) {
8418 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008419 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008420 ((RAW != '<') || (NXT(1) != '/')) &&
8421 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008422 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008423 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008424 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008425
8426 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008427 * First case : a Processing Instruction.
8428 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008429 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008430 xmlParsePI(ctxt);
8431 }
8432
8433 /*
8434 * Second case : a CDSection
8435 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008436 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008437 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008438 xmlParseCDSect(ctxt);
8439 }
8440
8441 /*
8442 * Third case : a comment
8443 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008444 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008445 (NXT(2) == '-') && (NXT(3) == '-')) {
8446 xmlParseComment(ctxt);
8447 ctxt->instate = XML_PARSER_CONTENT;
8448 }
8449
8450 /*
8451 * Fourth case : a sub-element.
8452 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008453 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008454 xmlParseElement(ctxt);
8455 }
8456
8457 /*
8458 * Fifth case : a reference. If if has not been resolved,
8459 * parsing returns it's Name, create the node
8460 */
8461
Daniel Veillard21a0f912001-02-25 19:54:14 +00008462 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008463 xmlParseReference(ctxt);
8464 }
8465
8466 /*
8467 * Last case, text. Note that References are handled directly.
8468 */
8469 else {
8470 xmlParseCharData(ctxt, 0);
8471 }
8472
8473 GROW;
8474 /*
8475 * Pop-up of finished entities.
8476 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008477 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008478 xmlPopInput(ctxt);
8479 SHRINK;
8480
Daniel Veillardfdc91562002-07-01 21:52:03 +00008481 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008482 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8483 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008484 ctxt->instate = XML_PARSER_EOF;
8485 break;
8486 }
8487 }
8488}
8489
8490/**
8491 * xmlParseElement:
8492 * @ctxt: an XML parser context
8493 *
8494 * parse an XML element, this is highly recursive
8495 *
8496 * [39] element ::= EmptyElemTag | STag content ETag
8497 *
8498 * [ WFC: Element Type Match ]
8499 * The Name in an element's end-tag must match the element type in the
8500 * start-tag.
8501 *
Owen Taylor3473f882001-02-23 17:55:21 +00008502 */
8503
8504void
8505xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008506 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008507 const xmlChar *prefix;
8508 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008509 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008510 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008511 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008512 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008513
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008514 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8515 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8516 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8517 xmlParserMaxDepth);
8518 ctxt->instate = XML_PARSER_EOF;
8519 return;
8520 }
8521
Owen Taylor3473f882001-02-23 17:55:21 +00008522 /* Capture start position */
8523 if (ctxt->record_info) {
8524 node_info.begin_pos = ctxt->input->consumed +
8525 (CUR_PTR - ctxt->input->base);
8526 node_info.begin_line = ctxt->input->line;
8527 }
8528
8529 if (ctxt->spaceNr == 0)
8530 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008531 else if (*ctxt->space == -2)
8532 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008533 else
8534 spacePush(ctxt, *ctxt->space);
8535
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008536 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008537#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008538 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008539#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008540 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008541#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008542 else
8543 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008545 if (name == NULL) {
8546 spacePop(ctxt);
8547 return;
8548 }
8549 namePush(ctxt, name);
8550 ret = ctxt->node;
8551
Daniel Veillard4432df22003-09-28 18:58:27 +00008552#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008553 /*
8554 * [ VC: Root Element Type ]
8555 * The Name in the document type declaration must match the element
8556 * type of the root element.
8557 */
8558 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8559 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8560 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008561#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008562
8563 /*
8564 * Check for an Empty Element.
8565 */
8566 if ((RAW == '/') && (NXT(1) == '>')) {
8567 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008568 if (ctxt->sax2) {
8569 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8570 (!ctxt->disableSAX))
8571 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008572#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 } else {
8574 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8575 (!ctxt->disableSAX))
8576 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008577#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008578 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579 namePop(ctxt);
8580 spacePop(ctxt);
8581 if (nsNr != ctxt->nsNr)
8582 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008583 if ( ret != NULL && ctxt->record_info ) {
8584 node_info.end_pos = ctxt->input->consumed +
8585 (CUR_PTR - ctxt->input->base);
8586 node_info.end_line = ctxt->input->line;
8587 node_info.node = ret;
8588 xmlParserAddNodeInfo(ctxt, &node_info);
8589 }
8590 return;
8591 }
8592 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008593 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008594 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008595 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8596 "Couldn't find end of Start Tag %s line %d\n",
8597 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008598
8599 /*
8600 * end of parsing of this node.
8601 */
8602 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008603 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008604 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008605 if (nsNr != ctxt->nsNr)
8606 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008607
8608 /*
8609 * Capture end position and add node
8610 */
8611 if ( ret != NULL && ctxt->record_info ) {
8612 node_info.end_pos = ctxt->input->consumed +
8613 (CUR_PTR - ctxt->input->base);
8614 node_info.end_line = ctxt->input->line;
8615 node_info.node = ret;
8616 xmlParserAddNodeInfo(ctxt, &node_info);
8617 }
8618 return;
8619 }
8620
8621 /*
8622 * Parse the content of the element:
8623 */
8624 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008625 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008626 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008627 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008628 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008629
8630 /*
8631 * end of parsing of this node.
8632 */
8633 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008634 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008635 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008636 if (nsNr != ctxt->nsNr)
8637 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008638 return;
8639 }
8640
8641 /*
8642 * parse the end of tag: '</' should be here.
8643 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008644 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008645 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008646 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008647 }
8648#ifdef LIBXML_SAX1_ENABLED
8649 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008650 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008651#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008652
8653 /*
8654 * Capture end position and add node
8655 */
8656 if ( ret != NULL && ctxt->record_info ) {
8657 node_info.end_pos = ctxt->input->consumed +
8658 (CUR_PTR - ctxt->input->base);
8659 node_info.end_line = ctxt->input->line;
8660 node_info.node = ret;
8661 xmlParserAddNodeInfo(ctxt, &node_info);
8662 }
8663}
8664
8665/**
8666 * xmlParseVersionNum:
8667 * @ctxt: an XML parser context
8668 *
8669 * parse the XML version value.
8670 *
8671 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8672 *
8673 * Returns the string giving the XML version number, or NULL
8674 */
8675xmlChar *
8676xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8677 xmlChar *buf = NULL;
8678 int len = 0;
8679 int size = 10;
8680 xmlChar cur;
8681
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008682 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008683 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008685 return(NULL);
8686 }
8687 cur = CUR;
8688 while (((cur >= 'a') && (cur <= 'z')) ||
8689 ((cur >= 'A') && (cur <= 'Z')) ||
8690 ((cur >= '0') && (cur <= '9')) ||
8691 (cur == '_') || (cur == '.') ||
8692 (cur == ':') || (cur == '-')) {
8693 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008694 xmlChar *tmp;
8695
Owen Taylor3473f882001-02-23 17:55:21 +00008696 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008697 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8698 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008699 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008700 return(NULL);
8701 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008702 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008703 }
8704 buf[len++] = cur;
8705 NEXT;
8706 cur=CUR;
8707 }
8708 buf[len] = 0;
8709 return(buf);
8710}
8711
8712/**
8713 * xmlParseVersionInfo:
8714 * @ctxt: an XML parser context
8715 *
8716 * parse the XML version.
8717 *
8718 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8719 *
8720 * [25] Eq ::= S? '=' S?
8721 *
8722 * Returns the version string, e.g. "1.0"
8723 */
8724
8725xmlChar *
8726xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8727 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008728
Daniel Veillarda07050d2003-10-19 14:46:32 +00008729 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008730 SKIP(7);
8731 SKIP_BLANKS;
8732 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008733 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008734 return(NULL);
8735 }
8736 NEXT;
8737 SKIP_BLANKS;
8738 if (RAW == '"') {
8739 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008740 version = xmlParseVersionNum(ctxt);
8741 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008742 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008743 } else
8744 NEXT;
8745 } else if (RAW == '\''){
8746 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008747 version = xmlParseVersionNum(ctxt);
8748 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008749 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008750 } else
8751 NEXT;
8752 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008753 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008754 }
8755 }
8756 return(version);
8757}
8758
8759/**
8760 * xmlParseEncName:
8761 * @ctxt: an XML parser context
8762 *
8763 * parse the XML encoding name
8764 *
8765 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8766 *
8767 * Returns the encoding name value or NULL
8768 */
8769xmlChar *
8770xmlParseEncName(xmlParserCtxtPtr ctxt) {
8771 xmlChar *buf = NULL;
8772 int len = 0;
8773 int size = 10;
8774 xmlChar cur;
8775
8776 cur = CUR;
8777 if (((cur >= 'a') && (cur <= 'z')) ||
8778 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008779 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008780 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008781 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008782 return(NULL);
8783 }
8784
8785 buf[len++] = cur;
8786 NEXT;
8787 cur = CUR;
8788 while (((cur >= 'a') && (cur <= 'z')) ||
8789 ((cur >= 'A') && (cur <= 'Z')) ||
8790 ((cur >= '0') && (cur <= '9')) ||
8791 (cur == '.') || (cur == '_') ||
8792 (cur == '-')) {
8793 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008794 xmlChar *tmp;
8795
Owen Taylor3473f882001-02-23 17:55:21 +00008796 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008797 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8798 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008799 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008800 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008801 return(NULL);
8802 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008803 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008804 }
8805 buf[len++] = cur;
8806 NEXT;
8807 cur = CUR;
8808 if (cur == 0) {
8809 SHRINK;
8810 GROW;
8811 cur = CUR;
8812 }
8813 }
8814 buf[len] = 0;
8815 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008816 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008817 }
8818 return(buf);
8819}
8820
8821/**
8822 * xmlParseEncodingDecl:
8823 * @ctxt: an XML parser context
8824 *
8825 * parse the XML encoding declaration
8826 *
8827 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8828 *
8829 * this setups the conversion filters.
8830 *
8831 * Returns the encoding value or NULL
8832 */
8833
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008834const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008835xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8836 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008837
8838 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008839 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008840 SKIP(8);
8841 SKIP_BLANKS;
8842 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008843 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008844 return(NULL);
8845 }
8846 NEXT;
8847 SKIP_BLANKS;
8848 if (RAW == '"') {
8849 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008850 encoding = xmlParseEncName(ctxt);
8851 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008852 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008853 } else
8854 NEXT;
8855 } else if (RAW == '\''){
8856 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008857 encoding = xmlParseEncName(ctxt);
8858 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008859 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008860 } else
8861 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008862 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008863 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008864 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008865 /*
8866 * UTF-16 encoding stwich has already taken place at this stage,
8867 * more over the little-endian/big-endian selection is already done
8868 */
8869 if ((encoding != NULL) &&
8870 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8871 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008872 if (ctxt->encoding != NULL)
8873 xmlFree((xmlChar *) ctxt->encoding);
8874 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008875 }
8876 /*
8877 * UTF-8 encoding is handled natively
8878 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008879 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008880 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8881 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008882 if (ctxt->encoding != NULL)
8883 xmlFree((xmlChar *) ctxt->encoding);
8884 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008885 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008886 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008887 xmlCharEncodingHandlerPtr handler;
8888
8889 if (ctxt->input->encoding != NULL)
8890 xmlFree((xmlChar *) ctxt->input->encoding);
8891 ctxt->input->encoding = encoding;
8892
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008893 handler = xmlFindCharEncodingHandler((const char *) encoding);
8894 if (handler != NULL) {
8895 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008896 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008897 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008898 "Unsupported encoding %s\n", encoding);
8899 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008900 }
8901 }
8902 }
8903 return(encoding);
8904}
8905
8906/**
8907 * xmlParseSDDecl:
8908 * @ctxt: an XML parser context
8909 *
8910 * parse the XML standalone declaration
8911 *
8912 * [32] SDDecl ::= S 'standalone' Eq
8913 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8914 *
8915 * [ VC: Standalone Document Declaration ]
8916 * TODO The standalone document declaration must have the value "no"
8917 * if any external markup declarations contain declarations of:
8918 * - attributes with default values, if elements to which these
8919 * attributes apply appear in the document without specifications
8920 * of values for these attributes, or
8921 * - entities (other than amp, lt, gt, apos, quot), if references
8922 * to those entities appear in the document, or
8923 * - attributes with values subject to normalization, where the
8924 * attribute appears in the document with a value which will change
8925 * as a result of normalization, or
8926 * - element types with element content, if white space occurs directly
8927 * within any instance of those types.
8928 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008929 * Returns:
8930 * 1 if standalone="yes"
8931 * 0 if standalone="no"
8932 * -2 if standalone attribute is missing or invalid
8933 * (A standalone value of -2 means that the XML declaration was found,
8934 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00008935 */
8936
8937int
8938xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008939 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00008940
8941 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008942 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008943 SKIP(10);
8944 SKIP_BLANKS;
8945 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008946 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008947 return(standalone);
8948 }
8949 NEXT;
8950 SKIP_BLANKS;
8951 if (RAW == '\''){
8952 NEXT;
8953 if ((RAW == 'n') && (NXT(1) == 'o')) {
8954 standalone = 0;
8955 SKIP(2);
8956 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8957 (NXT(2) == 's')) {
8958 standalone = 1;
8959 SKIP(3);
8960 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008961 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008962 }
8963 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008964 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008965 } else
8966 NEXT;
8967 } else if (RAW == '"'){
8968 NEXT;
8969 if ((RAW == 'n') && (NXT(1) == 'o')) {
8970 standalone = 0;
8971 SKIP(2);
8972 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8973 (NXT(2) == 's')) {
8974 standalone = 1;
8975 SKIP(3);
8976 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008977 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008978 }
8979 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008980 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008981 } else
8982 NEXT;
8983 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008984 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008985 }
8986 }
8987 return(standalone);
8988}
8989
8990/**
8991 * xmlParseXMLDecl:
8992 * @ctxt: an XML parser context
8993 *
8994 * parse an XML declaration header
8995 *
8996 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8997 */
8998
8999void
9000xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9001 xmlChar *version;
9002
9003 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009004 * This value for standalone indicates that the document has an
9005 * XML declaration but it does not have a standalone attribute.
9006 * It will be overwritten later if a standalone attribute is found.
9007 */
9008 ctxt->input->standalone = -2;
9009
9010 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009011 * We know that '<?xml' is here.
9012 */
9013 SKIP(5);
9014
William M. Brack76e95df2003-10-18 16:20:14 +00009015 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009016 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9017 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009018 }
9019 SKIP_BLANKS;
9020
9021 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009022 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009023 */
9024 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009025 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009026 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009027 } else {
9028 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9029 /*
9030 * TODO: Blueberry should be detected here
9031 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009032 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9033 "Unsupported version '%s'\n",
9034 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009035 }
9036 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009037 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009038 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009039 }
Owen Taylor3473f882001-02-23 17:55:21 +00009040
9041 /*
9042 * We may have the encoding declaration
9043 */
William M. Brack76e95df2003-10-18 16:20:14 +00009044 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009045 if ((RAW == '?') && (NXT(1) == '>')) {
9046 SKIP(2);
9047 return;
9048 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009049 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009050 }
9051 xmlParseEncodingDecl(ctxt);
9052 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9053 /*
9054 * The XML REC instructs us to stop parsing right here
9055 */
9056 return;
9057 }
9058
9059 /*
9060 * We may have the standalone status.
9061 */
William M. Brack76e95df2003-10-18 16:20:14 +00009062 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009063 if ((RAW == '?') && (NXT(1) == '>')) {
9064 SKIP(2);
9065 return;
9066 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009067 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009068 }
9069 SKIP_BLANKS;
9070 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9071
9072 SKIP_BLANKS;
9073 if ((RAW == '?') && (NXT(1) == '>')) {
9074 SKIP(2);
9075 } else if (RAW == '>') {
9076 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009077 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009078 NEXT;
9079 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009080 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009081 MOVETO_ENDTAG(CUR_PTR);
9082 NEXT;
9083 }
9084}
9085
9086/**
9087 * xmlParseMisc:
9088 * @ctxt: an XML parser context
9089 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009090 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009091 *
9092 * [27] Misc ::= Comment | PI | S
9093 */
9094
9095void
9096xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009097 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009098 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009099 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009100 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009101 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009102 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009103 NEXT;
9104 } else
9105 xmlParseComment(ctxt);
9106 }
9107}
9108
9109/**
9110 * xmlParseDocument:
9111 * @ctxt: an XML parser context
9112 *
9113 * parse an XML document (and build a tree if using the standard SAX
9114 * interface).
9115 *
9116 * [1] document ::= prolog element Misc*
9117 *
9118 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9119 *
9120 * Returns 0, -1 in case of error. the parser context is augmented
9121 * as a result of the parsing.
9122 */
9123
9124int
9125xmlParseDocument(xmlParserCtxtPtr ctxt) {
9126 xmlChar start[4];
9127 xmlCharEncoding enc;
9128
9129 xmlInitParser();
9130
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009131 if ((ctxt == NULL) || (ctxt->input == NULL))
9132 return(-1);
9133
Owen Taylor3473f882001-02-23 17:55:21 +00009134 GROW;
9135
9136 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009137 * SAX: detecting the level.
9138 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009139 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009140
9141 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009142 * SAX: beginning of the document processing.
9143 */
9144 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9145 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9146
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009147 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9148 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009149 /*
9150 * Get the 4 first bytes and decode the charset
9151 * if enc != XML_CHAR_ENCODING_NONE
9152 * plug some encoding conversion routines.
9153 */
9154 start[0] = RAW;
9155 start[1] = NXT(1);
9156 start[2] = NXT(2);
9157 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009158 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009159 if (enc != XML_CHAR_ENCODING_NONE) {
9160 xmlSwitchEncoding(ctxt, enc);
9161 }
Owen Taylor3473f882001-02-23 17:55:21 +00009162 }
9163
9164
9165 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009166 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009167 }
9168
9169 /*
9170 * Check for the XMLDecl in the Prolog.
9171 */
9172 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009173 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009174
9175 /*
9176 * Note that we will switch encoding on the fly.
9177 */
9178 xmlParseXMLDecl(ctxt);
9179 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9180 /*
9181 * The XML REC instructs us to stop parsing right here
9182 */
9183 return(-1);
9184 }
9185 ctxt->standalone = ctxt->input->standalone;
9186 SKIP_BLANKS;
9187 } else {
9188 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9189 }
9190 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9191 ctxt->sax->startDocument(ctxt->userData);
9192
9193 /*
9194 * The Misc part of the Prolog
9195 */
9196 GROW;
9197 xmlParseMisc(ctxt);
9198
9199 /*
9200 * Then possibly doc type declaration(s) and more Misc
9201 * (doctypedecl Misc*)?
9202 */
9203 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009204 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009205
9206 ctxt->inSubset = 1;
9207 xmlParseDocTypeDecl(ctxt);
9208 if (RAW == '[') {
9209 ctxt->instate = XML_PARSER_DTD;
9210 xmlParseInternalSubset(ctxt);
9211 }
9212
9213 /*
9214 * Create and update the external subset.
9215 */
9216 ctxt->inSubset = 2;
9217 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9218 (!ctxt->disableSAX))
9219 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9220 ctxt->extSubSystem, ctxt->extSubURI);
9221 ctxt->inSubset = 0;
9222
9223
9224 ctxt->instate = XML_PARSER_PROLOG;
9225 xmlParseMisc(ctxt);
9226 }
9227
9228 /*
9229 * Time to start parsing the tree itself
9230 */
9231 GROW;
9232 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009233 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9234 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009235 } else {
9236 ctxt->instate = XML_PARSER_CONTENT;
9237 xmlParseElement(ctxt);
9238 ctxt->instate = XML_PARSER_EPILOG;
9239
9240
9241 /*
9242 * The Misc part at the end
9243 */
9244 xmlParseMisc(ctxt);
9245
Daniel Veillard561b7f82002-03-20 21:55:57 +00009246 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009247 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009248 }
9249 ctxt->instate = XML_PARSER_EOF;
9250 }
9251
9252 /*
9253 * SAX: end of the document processing.
9254 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009255 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009256 ctxt->sax->endDocument(ctxt->userData);
9257
Daniel Veillard5997aca2002-03-18 18:36:20 +00009258 /*
9259 * Remove locally kept entity definitions if the tree was not built
9260 */
9261 if ((ctxt->myDoc != NULL) &&
9262 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9263 xmlFreeDoc(ctxt->myDoc);
9264 ctxt->myDoc = NULL;
9265 }
9266
Daniel Veillardc7612992002-02-17 22:47:37 +00009267 if (! ctxt->wellFormed) {
9268 ctxt->valid = 0;
9269 return(-1);
9270 }
Owen Taylor3473f882001-02-23 17:55:21 +00009271 return(0);
9272}
9273
9274/**
9275 * xmlParseExtParsedEnt:
9276 * @ctxt: an XML parser context
9277 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009278 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009279 * An external general parsed entity is well-formed if it matches the
9280 * production labeled extParsedEnt.
9281 *
9282 * [78] extParsedEnt ::= TextDecl? content
9283 *
9284 * Returns 0, -1 in case of error. the parser context is augmented
9285 * as a result of the parsing.
9286 */
9287
9288int
9289xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9290 xmlChar start[4];
9291 xmlCharEncoding enc;
9292
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009293 if ((ctxt == NULL) || (ctxt->input == NULL))
9294 return(-1);
9295
Owen Taylor3473f882001-02-23 17:55:21 +00009296 xmlDefaultSAXHandlerInit();
9297
Daniel Veillard309f81d2003-09-23 09:02:53 +00009298 xmlDetectSAX2(ctxt);
9299
Owen Taylor3473f882001-02-23 17:55:21 +00009300 GROW;
9301
9302 /*
9303 * SAX: beginning of the document processing.
9304 */
9305 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9306 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9307
9308 /*
9309 * Get the 4 first bytes and decode the charset
9310 * if enc != XML_CHAR_ENCODING_NONE
9311 * plug some encoding conversion routines.
9312 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009313 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9314 start[0] = RAW;
9315 start[1] = NXT(1);
9316 start[2] = NXT(2);
9317 start[3] = NXT(3);
9318 enc = xmlDetectCharEncoding(start, 4);
9319 if (enc != XML_CHAR_ENCODING_NONE) {
9320 xmlSwitchEncoding(ctxt, enc);
9321 }
Owen Taylor3473f882001-02-23 17:55:21 +00009322 }
9323
9324
9325 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009326 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009327 }
9328
9329 /*
9330 * Check for the XMLDecl in the Prolog.
9331 */
9332 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009333 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009334
9335 /*
9336 * Note that we will switch encoding on the fly.
9337 */
9338 xmlParseXMLDecl(ctxt);
9339 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9340 /*
9341 * The XML REC instructs us to stop parsing right here
9342 */
9343 return(-1);
9344 }
9345 SKIP_BLANKS;
9346 } else {
9347 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9348 }
9349 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9350 ctxt->sax->startDocument(ctxt->userData);
9351
9352 /*
9353 * Doing validity checking on chunk doesn't make sense
9354 */
9355 ctxt->instate = XML_PARSER_CONTENT;
9356 ctxt->validate = 0;
9357 ctxt->loadsubset = 0;
9358 ctxt->depth = 0;
9359
9360 xmlParseContent(ctxt);
9361
9362 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009363 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009364 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009365 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009366 }
9367
9368 /*
9369 * SAX: end of the document processing.
9370 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009371 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009372 ctxt->sax->endDocument(ctxt->userData);
9373
9374 if (! ctxt->wellFormed) return(-1);
9375 return(0);
9376}
9377
Daniel Veillard73b013f2003-09-30 12:36:01 +00009378#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009379/************************************************************************
9380 * *
9381 * Progressive parsing interfaces *
9382 * *
9383 ************************************************************************/
9384
9385/**
9386 * xmlParseLookupSequence:
9387 * @ctxt: an XML parser context
9388 * @first: the first char to lookup
9389 * @next: the next char to lookup or zero
9390 * @third: the next char to lookup or zero
9391 *
9392 * Try to find if a sequence (first, next, third) or just (first next) or
9393 * (first) is available in the input stream.
9394 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9395 * to avoid rescanning sequences of bytes, it DOES change the state of the
9396 * parser, do not use liberally.
9397 *
9398 * Returns the index to the current parsing point if the full sequence
9399 * is available, -1 otherwise.
9400 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009401static int
Owen Taylor3473f882001-02-23 17:55:21 +00009402xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9403 xmlChar next, xmlChar third) {
9404 int base, len;
9405 xmlParserInputPtr in;
9406 const xmlChar *buf;
9407
9408 in = ctxt->input;
9409 if (in == NULL) return(-1);
9410 base = in->cur - in->base;
9411 if (base < 0) return(-1);
9412 if (ctxt->checkIndex > base)
9413 base = ctxt->checkIndex;
9414 if (in->buf == NULL) {
9415 buf = in->base;
9416 len = in->length;
9417 } else {
9418 buf = in->buf->buffer->content;
9419 len = in->buf->buffer->use;
9420 }
9421 /* take into account the sequence length */
9422 if (third) len -= 2;
9423 else if (next) len --;
9424 for (;base < len;base++) {
9425 if (buf[base] == first) {
9426 if (third != 0) {
9427 if ((buf[base + 1] != next) ||
9428 (buf[base + 2] != third)) continue;
9429 } else if (next != 0) {
9430 if (buf[base + 1] != next) continue;
9431 }
9432 ctxt->checkIndex = 0;
9433#ifdef DEBUG_PUSH
9434 if (next == 0)
9435 xmlGenericError(xmlGenericErrorContext,
9436 "PP: lookup '%c' found at %d\n",
9437 first, base);
9438 else if (third == 0)
9439 xmlGenericError(xmlGenericErrorContext,
9440 "PP: lookup '%c%c' found at %d\n",
9441 first, next, base);
9442 else
9443 xmlGenericError(xmlGenericErrorContext,
9444 "PP: lookup '%c%c%c' found at %d\n",
9445 first, next, third, base);
9446#endif
9447 return(base - (in->cur - in->base));
9448 }
9449 }
9450 ctxt->checkIndex = base;
9451#ifdef DEBUG_PUSH
9452 if (next == 0)
9453 xmlGenericError(xmlGenericErrorContext,
9454 "PP: lookup '%c' failed\n", first);
9455 else if (third == 0)
9456 xmlGenericError(xmlGenericErrorContext,
9457 "PP: lookup '%c%c' failed\n", first, next);
9458 else
9459 xmlGenericError(xmlGenericErrorContext,
9460 "PP: lookup '%c%c%c' failed\n", first, next, third);
9461#endif
9462 return(-1);
9463}
9464
9465/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 * xmlParseGetLasts:
9467 * @ctxt: an XML parser context
9468 * @lastlt: pointer to store the last '<' from the input
9469 * @lastgt: pointer to store the last '>' from the input
9470 *
9471 * Lookup the last < and > in the current chunk
9472 */
9473static void
9474xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9475 const xmlChar **lastgt) {
9476 const xmlChar *tmp;
9477
9478 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9479 xmlGenericError(xmlGenericErrorContext,
9480 "Internal error: xmlParseGetLasts\n");
9481 return;
9482 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009483 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009484 tmp = ctxt->input->end;
9485 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009486 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009487 if (tmp < ctxt->input->base) {
9488 *lastlt = NULL;
9489 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009490 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009491 *lastlt = tmp;
9492 tmp++;
9493 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9494 if (*tmp == '\'') {
9495 tmp++;
9496 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9497 if (tmp < ctxt->input->end) tmp++;
9498 } else if (*tmp == '"') {
9499 tmp++;
9500 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9501 if (tmp < ctxt->input->end) tmp++;
9502 } else
9503 tmp++;
9504 }
9505 if (tmp < ctxt->input->end)
9506 *lastgt = tmp;
9507 else {
9508 tmp = *lastlt;
9509 tmp--;
9510 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9511 if (tmp >= ctxt->input->base)
9512 *lastgt = tmp;
9513 else
9514 *lastgt = NULL;
9515 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009516 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009517 } else {
9518 *lastlt = NULL;
9519 *lastgt = NULL;
9520 }
9521}
9522/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009523 * xmlCheckCdataPush:
9524 * @cur: pointer to the bock of characters
9525 * @len: length of the block in bytes
9526 *
9527 * Check that the block of characters is okay as SCdata content [20]
9528 *
9529 * Returns the number of bytes to pass if okay, a negative index where an
9530 * UTF-8 error occured otherwise
9531 */
9532static int
9533xmlCheckCdataPush(const xmlChar *utf, int len) {
9534 int ix;
9535 unsigned char c;
9536 int codepoint;
9537
9538 if ((utf == NULL) || (len <= 0))
9539 return(0);
9540
9541 for (ix = 0; ix < len;) { /* string is 0-terminated */
9542 c = utf[ix];
9543 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9544 if (c >= 0x20)
9545 ix++;
9546 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9547 ix++;
9548 else
9549 return(-ix);
9550 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9551 if (ix + 2 > len) return(ix);
9552 if ((utf[ix+1] & 0xc0 ) != 0x80)
9553 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009554 codepoint = (utf[ix] & 0x1f) << 6;
9555 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009556 if (!xmlIsCharQ(codepoint))
9557 return(-ix);
9558 ix += 2;
9559 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9560 if (ix + 3 > len) return(ix);
9561 if (((utf[ix+1] & 0xc0) != 0x80) ||
9562 ((utf[ix+2] & 0xc0) != 0x80))
9563 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009564 codepoint = (utf[ix] & 0xf) << 12;
9565 codepoint |= (utf[ix+1] & 0x3f) << 6;
9566 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009567 if (!xmlIsCharQ(codepoint))
9568 return(-ix);
9569 ix += 3;
9570 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9571 if (ix + 4 > len) return(ix);
9572 if (((utf[ix+1] & 0xc0) != 0x80) ||
9573 ((utf[ix+2] & 0xc0) != 0x80) ||
9574 ((utf[ix+3] & 0xc0) != 0x80))
9575 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009576 codepoint = (utf[ix] & 0x7) << 18;
9577 codepoint |= (utf[ix+1] & 0x3f) << 12;
9578 codepoint |= (utf[ix+2] & 0x3f) << 6;
9579 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009580 if (!xmlIsCharQ(codepoint))
9581 return(-ix);
9582 ix += 4;
9583 } else /* unknown encoding */
9584 return(-ix);
9585 }
9586 return(ix);
9587}
9588
9589/**
Owen Taylor3473f882001-02-23 17:55:21 +00009590 * xmlParseTryOrFinish:
9591 * @ctxt: an XML parser context
9592 * @terminate: last chunk indicator
9593 *
9594 * Try to progress on parsing
9595 *
9596 * Returns zero if no parsing was possible
9597 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009598static int
Owen Taylor3473f882001-02-23 17:55:21 +00009599xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9600 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009601 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009602 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009603 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009604
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009605 if (ctxt->input == NULL)
9606 return(0);
9607
Owen Taylor3473f882001-02-23 17:55:21 +00009608#ifdef DEBUG_PUSH
9609 switch (ctxt->instate) {
9610 case XML_PARSER_EOF:
9611 xmlGenericError(xmlGenericErrorContext,
9612 "PP: try EOF\n"); break;
9613 case XML_PARSER_START:
9614 xmlGenericError(xmlGenericErrorContext,
9615 "PP: try START\n"); break;
9616 case XML_PARSER_MISC:
9617 xmlGenericError(xmlGenericErrorContext,
9618 "PP: try MISC\n");break;
9619 case XML_PARSER_COMMENT:
9620 xmlGenericError(xmlGenericErrorContext,
9621 "PP: try COMMENT\n");break;
9622 case XML_PARSER_PROLOG:
9623 xmlGenericError(xmlGenericErrorContext,
9624 "PP: try PROLOG\n");break;
9625 case XML_PARSER_START_TAG:
9626 xmlGenericError(xmlGenericErrorContext,
9627 "PP: try START_TAG\n");break;
9628 case XML_PARSER_CONTENT:
9629 xmlGenericError(xmlGenericErrorContext,
9630 "PP: try CONTENT\n");break;
9631 case XML_PARSER_CDATA_SECTION:
9632 xmlGenericError(xmlGenericErrorContext,
9633 "PP: try CDATA_SECTION\n");break;
9634 case XML_PARSER_END_TAG:
9635 xmlGenericError(xmlGenericErrorContext,
9636 "PP: try END_TAG\n");break;
9637 case XML_PARSER_ENTITY_DECL:
9638 xmlGenericError(xmlGenericErrorContext,
9639 "PP: try ENTITY_DECL\n");break;
9640 case XML_PARSER_ENTITY_VALUE:
9641 xmlGenericError(xmlGenericErrorContext,
9642 "PP: try ENTITY_VALUE\n");break;
9643 case XML_PARSER_ATTRIBUTE_VALUE:
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: try ATTRIBUTE_VALUE\n");break;
9646 case XML_PARSER_DTD:
9647 xmlGenericError(xmlGenericErrorContext,
9648 "PP: try DTD\n");break;
9649 case XML_PARSER_EPILOG:
9650 xmlGenericError(xmlGenericErrorContext,
9651 "PP: try EPILOG\n");break;
9652 case XML_PARSER_PI:
9653 xmlGenericError(xmlGenericErrorContext,
9654 "PP: try PI\n");break;
9655 case XML_PARSER_IGNORE:
9656 xmlGenericError(xmlGenericErrorContext,
9657 "PP: try IGNORE\n");break;
9658 }
9659#endif
9660
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009661 if ((ctxt->input != NULL) &&
9662 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009663 xmlSHRINK(ctxt);
9664 ctxt->checkIndex = 0;
9665 }
9666 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009667
Daniel Veillarda880b122003-04-21 21:36:41 +00009668 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009669 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009670 return(0);
9671
9672
Owen Taylor3473f882001-02-23 17:55:21 +00009673 /*
9674 * Pop-up of finished entities.
9675 */
9676 while ((RAW == 0) && (ctxt->inputNr > 1))
9677 xmlPopInput(ctxt);
9678
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009679 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009680 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009681 avail = ctxt->input->length -
9682 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009683 else {
9684 /*
9685 * If we are operating on converted input, try to flush
9686 * remainng chars to avoid them stalling in the non-converted
9687 * buffer.
9688 */
9689 if ((ctxt->input->buf->raw != NULL) &&
9690 (ctxt->input->buf->raw->use > 0)) {
9691 int base = ctxt->input->base -
9692 ctxt->input->buf->buffer->content;
9693 int current = ctxt->input->cur - ctxt->input->base;
9694
9695 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9696 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9697 ctxt->input->cur = ctxt->input->base + current;
9698 ctxt->input->end =
9699 &ctxt->input->buf->buffer->content[
9700 ctxt->input->buf->buffer->use];
9701 }
9702 avail = ctxt->input->buf->buffer->use -
9703 (ctxt->input->cur - ctxt->input->base);
9704 }
Owen Taylor3473f882001-02-23 17:55:21 +00009705 if (avail < 1)
9706 goto done;
9707 switch (ctxt->instate) {
9708 case XML_PARSER_EOF:
9709 /*
9710 * Document parsing is done !
9711 */
9712 goto done;
9713 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009714 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9715 xmlChar start[4];
9716 xmlCharEncoding enc;
9717
9718 /*
9719 * Very first chars read from the document flow.
9720 */
9721 if (avail < 4)
9722 goto done;
9723
9724 /*
9725 * Get the 4 first bytes and decode the charset
9726 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009727 * plug some encoding conversion routines,
9728 * else xmlSwitchEncoding will set to (default)
9729 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009730 */
9731 start[0] = RAW;
9732 start[1] = NXT(1);
9733 start[2] = NXT(2);
9734 start[3] = NXT(3);
9735 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009736 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009737 break;
9738 }
Owen Taylor3473f882001-02-23 17:55:21 +00009739
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009740 if (avail < 2)
9741 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009742 cur = ctxt->input->cur[0];
9743 next = ctxt->input->cur[1];
9744 if (cur == 0) {
9745 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9746 ctxt->sax->setDocumentLocator(ctxt->userData,
9747 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009748 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009749 ctxt->instate = XML_PARSER_EOF;
9750#ifdef DEBUG_PUSH
9751 xmlGenericError(xmlGenericErrorContext,
9752 "PP: entering EOF\n");
9753#endif
9754 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9755 ctxt->sax->endDocument(ctxt->userData);
9756 goto done;
9757 }
9758 if ((cur == '<') && (next == '?')) {
9759 /* PI or XML decl */
9760 if (avail < 5) return(ret);
9761 if ((!terminate) &&
9762 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9763 return(ret);
9764 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9765 ctxt->sax->setDocumentLocator(ctxt->userData,
9766 &xmlDefaultSAXLocator);
9767 if ((ctxt->input->cur[2] == 'x') &&
9768 (ctxt->input->cur[3] == 'm') &&
9769 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009770 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009771 ret += 5;
9772#ifdef DEBUG_PUSH
9773 xmlGenericError(xmlGenericErrorContext,
9774 "PP: Parsing XML Decl\n");
9775#endif
9776 xmlParseXMLDecl(ctxt);
9777 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9778 /*
9779 * The XML REC instructs us to stop parsing right
9780 * here
9781 */
9782 ctxt->instate = XML_PARSER_EOF;
9783 return(0);
9784 }
9785 ctxt->standalone = ctxt->input->standalone;
9786 if ((ctxt->encoding == NULL) &&
9787 (ctxt->input->encoding != NULL))
9788 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9789 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9790 (!ctxt->disableSAX))
9791 ctxt->sax->startDocument(ctxt->userData);
9792 ctxt->instate = XML_PARSER_MISC;
9793#ifdef DEBUG_PUSH
9794 xmlGenericError(xmlGenericErrorContext,
9795 "PP: entering MISC\n");
9796#endif
9797 } else {
9798 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9799 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9800 (!ctxt->disableSAX))
9801 ctxt->sax->startDocument(ctxt->userData);
9802 ctxt->instate = XML_PARSER_MISC;
9803#ifdef DEBUG_PUSH
9804 xmlGenericError(xmlGenericErrorContext,
9805 "PP: entering MISC\n");
9806#endif
9807 }
9808 } else {
9809 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9810 ctxt->sax->setDocumentLocator(ctxt->userData,
9811 &xmlDefaultSAXLocator);
9812 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009813 if (ctxt->version == NULL) {
9814 xmlErrMemory(ctxt, NULL);
9815 break;
9816 }
Owen Taylor3473f882001-02-23 17:55:21 +00009817 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9818 (!ctxt->disableSAX))
9819 ctxt->sax->startDocument(ctxt->userData);
9820 ctxt->instate = XML_PARSER_MISC;
9821#ifdef DEBUG_PUSH
9822 xmlGenericError(xmlGenericErrorContext,
9823 "PP: entering MISC\n");
9824#endif
9825 }
9826 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009827 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009828 const xmlChar *name;
9829 const xmlChar *prefix;
9830 const xmlChar *URI;
9831 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009832
9833 if ((avail < 2) && (ctxt->inputNr == 1))
9834 goto done;
9835 cur = ctxt->input->cur[0];
9836 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009837 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009838 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009839 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9840 ctxt->sax->endDocument(ctxt->userData);
9841 goto done;
9842 }
9843 if (!terminate) {
9844 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009845 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009846 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009847 goto done;
9848 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9849 goto done;
9850 }
9851 }
9852 if (ctxt->spaceNr == 0)
9853 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009854 else if (*ctxt->space == -2)
9855 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009856 else
9857 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009858#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009859 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009860#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009861 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009862#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009863 else
9864 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009865#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009866 if (name == NULL) {
9867 spacePop(ctxt);
9868 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009869 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9870 ctxt->sax->endDocument(ctxt->userData);
9871 goto done;
9872 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009873#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009874 /*
9875 * [ VC: Root Element Type ]
9876 * The Name in the document type declaration must match
9877 * the element type of the root element.
9878 */
9879 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9880 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9881 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009882#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009883
9884 /*
9885 * Check for an Empty Element.
9886 */
9887 if ((RAW == '/') && (NXT(1) == '>')) {
9888 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009889
9890 if (ctxt->sax2) {
9891 if ((ctxt->sax != NULL) &&
9892 (ctxt->sax->endElementNs != NULL) &&
9893 (!ctxt->disableSAX))
9894 ctxt->sax->endElementNs(ctxt->userData, name,
9895 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009896 if (ctxt->nsNr - nsNr > 0)
9897 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009898#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009899 } else {
9900 if ((ctxt->sax != NULL) &&
9901 (ctxt->sax->endElement != NULL) &&
9902 (!ctxt->disableSAX))
9903 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009904#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009905 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009906 spacePop(ctxt);
9907 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009908 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 } else {
9910 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009911 }
9912 break;
9913 }
9914 if (RAW == '>') {
9915 NEXT;
9916 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009917 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009918 "Couldn't find end of Start Tag %s\n",
9919 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009920 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009921 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009922 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009923 if (ctxt->sax2)
9924 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009925#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009926 else
9927 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009928#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009929
Daniel Veillarda880b122003-04-21 21:36:41 +00009930 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009931 break;
9932 }
9933 case XML_PARSER_CONTENT: {
9934 const xmlChar *test;
9935 unsigned int cons;
9936 if ((avail < 2) && (ctxt->inputNr == 1))
9937 goto done;
9938 cur = ctxt->input->cur[0];
9939 next = ctxt->input->cur[1];
9940
9941 test = CUR_PTR;
9942 cons = ctxt->input->consumed;
9943 if ((cur == '<') && (next == '/')) {
9944 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009945 break;
9946 } else if ((cur == '<') && (next == '?')) {
9947 if ((!terminate) &&
9948 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9949 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009950 xmlParsePI(ctxt);
9951 } else if ((cur == '<') && (next != '!')) {
9952 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009953 break;
9954 } else if ((cur == '<') && (next == '!') &&
9955 (ctxt->input->cur[2] == '-') &&
9956 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009957 int term;
9958
9959 if (avail < 4)
9960 goto done;
9961 ctxt->input->cur += 4;
9962 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9963 ctxt->input->cur -= 4;
9964 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009965 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009966 xmlParseComment(ctxt);
9967 ctxt->instate = XML_PARSER_CONTENT;
9968 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9969 (ctxt->input->cur[2] == '[') &&
9970 (ctxt->input->cur[3] == 'C') &&
9971 (ctxt->input->cur[4] == 'D') &&
9972 (ctxt->input->cur[5] == 'A') &&
9973 (ctxt->input->cur[6] == 'T') &&
9974 (ctxt->input->cur[7] == 'A') &&
9975 (ctxt->input->cur[8] == '[')) {
9976 SKIP(9);
9977 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009978 break;
9979 } else if ((cur == '<') && (next == '!') &&
9980 (avail < 9)) {
9981 goto done;
9982 } else if (cur == '&') {
9983 if ((!terminate) &&
9984 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9985 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009986 xmlParseReference(ctxt);
9987 } else {
9988 /* TODO Avoid the extra copy, handle directly !!! */
9989 /*
9990 * Goal of the following test is:
9991 * - minimize calls to the SAX 'character' callback
9992 * when they are mergeable
9993 * - handle an problem for isBlank when we only parse
9994 * a sequence of blank chars and the next one is
9995 * not available to check against '<' presence.
9996 * - tries to homogenize the differences in SAX
9997 * callbacks between the push and pull versions
9998 * of the parser.
9999 */
10000 if ((ctxt->inputNr == 1) &&
10001 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10002 if (!terminate) {
10003 if (ctxt->progressive) {
10004 if ((lastlt == NULL) ||
10005 (ctxt->input->cur > lastlt))
10006 goto done;
10007 } else if (xmlParseLookupSequence(ctxt,
10008 '<', 0, 0) < 0) {
10009 goto done;
10010 }
10011 }
10012 }
10013 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010014 xmlParseCharData(ctxt, 0);
10015 }
10016 /*
10017 * Pop-up of finished entities.
10018 */
10019 while ((RAW == 0) && (ctxt->inputNr > 1))
10020 xmlPopInput(ctxt);
10021 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010022 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10023 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010024 ctxt->instate = XML_PARSER_EOF;
10025 break;
10026 }
10027 break;
10028 }
10029 case XML_PARSER_END_TAG:
10030 if (avail < 2)
10031 goto done;
10032 if (!terminate) {
10033 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010034 /* > can be found unescaped in attribute values */
10035 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 goto done;
10037 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10038 goto done;
10039 }
10040 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010041 if (ctxt->sax2) {
10042 xmlParseEndTag2(ctxt,
10043 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10044 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010045 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010046 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010047 }
10048#ifdef LIBXML_SAX1_ENABLED
10049 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010050 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010051#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010052 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010053 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010054 } else {
10055 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010056 }
10057 break;
10058 case XML_PARSER_CDATA_SECTION: {
10059 /*
10060 * The Push mode need to have the SAX callback for
10061 * cdataBlock merge back contiguous callbacks.
10062 */
10063 int base;
10064
10065 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10066 if (base < 0) {
10067 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010068 int tmp;
10069
10070 tmp = xmlCheckCdataPush(ctxt->input->cur,
10071 XML_PARSER_BIG_BUFFER_SIZE);
10072 if (tmp < 0) {
10073 tmp = -tmp;
10074 ctxt->input->cur += tmp;
10075 goto encoding_error;
10076 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010077 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10078 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010079 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010080 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010081 else if (ctxt->sax->characters != NULL)
10082 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010083 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010084 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010085 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010086 ctxt->checkIndex = 0;
10087 }
10088 goto done;
10089 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010090 int tmp;
10091
10092 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10093 if ((tmp < 0) || (tmp != base)) {
10094 tmp = -tmp;
10095 ctxt->input->cur += tmp;
10096 goto encoding_error;
10097 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010098 if ((ctxt->sax != NULL) && (base > 0) &&
10099 (!ctxt->disableSAX)) {
10100 if (ctxt->sax->cdataBlock != NULL)
10101 ctxt->sax->cdataBlock(ctxt->userData,
10102 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010103 else if (ctxt->sax->characters != NULL)
10104 ctxt->sax->characters(ctxt->userData,
10105 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010106 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010107 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010108 ctxt->checkIndex = 0;
10109 ctxt->instate = XML_PARSER_CONTENT;
10110#ifdef DEBUG_PUSH
10111 xmlGenericError(xmlGenericErrorContext,
10112 "PP: entering CONTENT\n");
10113#endif
10114 }
10115 break;
10116 }
Owen Taylor3473f882001-02-23 17:55:21 +000010117 case XML_PARSER_MISC:
10118 SKIP_BLANKS;
10119 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010120 avail = ctxt->input->length -
10121 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010122 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010123 avail = ctxt->input->buf->buffer->use -
10124 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010125 if (avail < 2)
10126 goto done;
10127 cur = ctxt->input->cur[0];
10128 next = ctxt->input->cur[1];
10129 if ((cur == '<') && (next == '?')) {
10130 if ((!terminate) &&
10131 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10132 goto done;
10133#ifdef DEBUG_PUSH
10134 xmlGenericError(xmlGenericErrorContext,
10135 "PP: Parsing PI\n");
10136#endif
10137 xmlParsePI(ctxt);
10138 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010139 (ctxt->input->cur[2] == '-') &&
10140 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010141 if ((!terminate) &&
10142 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10143 goto done;
10144#ifdef DEBUG_PUSH
10145 xmlGenericError(xmlGenericErrorContext,
10146 "PP: Parsing Comment\n");
10147#endif
10148 xmlParseComment(ctxt);
10149 ctxt->instate = XML_PARSER_MISC;
10150 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010151 (ctxt->input->cur[2] == 'D') &&
10152 (ctxt->input->cur[3] == 'O') &&
10153 (ctxt->input->cur[4] == 'C') &&
10154 (ctxt->input->cur[5] == 'T') &&
10155 (ctxt->input->cur[6] == 'Y') &&
10156 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010157 (ctxt->input->cur[8] == 'E')) {
10158 if ((!terminate) &&
10159 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10160 goto done;
10161#ifdef DEBUG_PUSH
10162 xmlGenericError(xmlGenericErrorContext,
10163 "PP: Parsing internal subset\n");
10164#endif
10165 ctxt->inSubset = 1;
10166 xmlParseDocTypeDecl(ctxt);
10167 if (RAW == '[') {
10168 ctxt->instate = XML_PARSER_DTD;
10169#ifdef DEBUG_PUSH
10170 xmlGenericError(xmlGenericErrorContext,
10171 "PP: entering DTD\n");
10172#endif
10173 } else {
10174 /*
10175 * Create and update the external subset.
10176 */
10177 ctxt->inSubset = 2;
10178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10179 (ctxt->sax->externalSubset != NULL))
10180 ctxt->sax->externalSubset(ctxt->userData,
10181 ctxt->intSubName, ctxt->extSubSystem,
10182 ctxt->extSubURI);
10183 ctxt->inSubset = 0;
10184 ctxt->instate = XML_PARSER_PROLOG;
10185#ifdef DEBUG_PUSH
10186 xmlGenericError(xmlGenericErrorContext,
10187 "PP: entering PROLOG\n");
10188#endif
10189 }
10190 } else if ((cur == '<') && (next == '!') &&
10191 (avail < 9)) {
10192 goto done;
10193 } else {
10194 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010195 ctxt->progressive = 1;
10196 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010197#ifdef DEBUG_PUSH
10198 xmlGenericError(xmlGenericErrorContext,
10199 "PP: entering START_TAG\n");
10200#endif
10201 }
10202 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010203 case XML_PARSER_PROLOG:
10204 SKIP_BLANKS;
10205 if (ctxt->input->buf == NULL)
10206 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10207 else
10208 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10209 if (avail < 2)
10210 goto done;
10211 cur = ctxt->input->cur[0];
10212 next = ctxt->input->cur[1];
10213 if ((cur == '<') && (next == '?')) {
10214 if ((!terminate) &&
10215 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10216 goto done;
10217#ifdef DEBUG_PUSH
10218 xmlGenericError(xmlGenericErrorContext,
10219 "PP: Parsing PI\n");
10220#endif
10221 xmlParsePI(ctxt);
10222 } else if ((cur == '<') && (next == '!') &&
10223 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10224 if ((!terminate) &&
10225 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10226 goto done;
10227#ifdef DEBUG_PUSH
10228 xmlGenericError(xmlGenericErrorContext,
10229 "PP: Parsing Comment\n");
10230#endif
10231 xmlParseComment(ctxt);
10232 ctxt->instate = XML_PARSER_PROLOG;
10233 } else if ((cur == '<') && (next == '!') &&
10234 (avail < 4)) {
10235 goto done;
10236 } else {
10237 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010238 if (ctxt->progressive == 0)
10239 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010240 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010241#ifdef DEBUG_PUSH
10242 xmlGenericError(xmlGenericErrorContext,
10243 "PP: entering START_TAG\n");
10244#endif
10245 }
10246 break;
10247 case XML_PARSER_EPILOG:
10248 SKIP_BLANKS;
10249 if (ctxt->input->buf == NULL)
10250 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10251 else
10252 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10253 if (avail < 2)
10254 goto done;
10255 cur = ctxt->input->cur[0];
10256 next = ctxt->input->cur[1];
10257 if ((cur == '<') && (next == '?')) {
10258 if ((!terminate) &&
10259 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10260 goto done;
10261#ifdef DEBUG_PUSH
10262 xmlGenericError(xmlGenericErrorContext,
10263 "PP: Parsing PI\n");
10264#endif
10265 xmlParsePI(ctxt);
10266 ctxt->instate = XML_PARSER_EPILOG;
10267 } else if ((cur == '<') && (next == '!') &&
10268 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10269 if ((!terminate) &&
10270 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10271 goto done;
10272#ifdef DEBUG_PUSH
10273 xmlGenericError(xmlGenericErrorContext,
10274 "PP: Parsing Comment\n");
10275#endif
10276 xmlParseComment(ctxt);
10277 ctxt->instate = XML_PARSER_EPILOG;
10278 } else if ((cur == '<') && (next == '!') &&
10279 (avail < 4)) {
10280 goto done;
10281 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010282 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010283 ctxt->instate = XML_PARSER_EOF;
10284#ifdef DEBUG_PUSH
10285 xmlGenericError(xmlGenericErrorContext,
10286 "PP: entering EOF\n");
10287#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010288 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010289 ctxt->sax->endDocument(ctxt->userData);
10290 goto done;
10291 }
10292 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010293 case XML_PARSER_DTD: {
10294 /*
10295 * Sorry but progressive parsing of the internal subset
10296 * is not expected to be supported. We first check that
10297 * the full content of the internal subset is available and
10298 * the parsing is launched only at that point.
10299 * Internal subset ends up with "']' S? '>'" in an unescaped
10300 * section and not in a ']]>' sequence which are conditional
10301 * sections (whoever argued to keep that crap in XML deserve
10302 * a place in hell !).
10303 */
10304 int base, i;
10305 xmlChar *buf;
10306 xmlChar quote = 0;
10307
10308 base = ctxt->input->cur - ctxt->input->base;
10309 if (base < 0) return(0);
10310 if (ctxt->checkIndex > base)
10311 base = ctxt->checkIndex;
10312 buf = ctxt->input->buf->buffer->content;
10313 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10314 base++) {
10315 if (quote != 0) {
10316 if (buf[base] == quote)
10317 quote = 0;
10318 continue;
10319 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010320 if ((quote == 0) && (buf[base] == '<')) {
10321 int found = 0;
10322 /* special handling of comments */
10323 if (((unsigned int) base + 4 <
10324 ctxt->input->buf->buffer->use) &&
10325 (buf[base + 1] == '!') &&
10326 (buf[base + 2] == '-') &&
10327 (buf[base + 3] == '-')) {
10328 for (;(unsigned int) base + 3 <
10329 ctxt->input->buf->buffer->use; base++) {
10330 if ((buf[base] == '-') &&
10331 (buf[base + 1] == '-') &&
10332 (buf[base + 2] == '>')) {
10333 found = 1;
10334 base += 2;
10335 break;
10336 }
10337 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010338 if (!found) {
10339#if 0
10340 fprintf(stderr, "unfinished comment\n");
10341#endif
10342 break; /* for */
10343 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010344 continue;
10345 }
10346 }
Owen Taylor3473f882001-02-23 17:55:21 +000010347 if (buf[base] == '"') {
10348 quote = '"';
10349 continue;
10350 }
10351 if (buf[base] == '\'') {
10352 quote = '\'';
10353 continue;
10354 }
10355 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010356#if 0
10357 fprintf(stderr, "%c%c%c%c: ", buf[base],
10358 buf[base + 1], buf[base + 2], buf[base + 3]);
10359#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010360 if ((unsigned int) base +1 >=
10361 ctxt->input->buf->buffer->use)
10362 break;
10363 if (buf[base + 1] == ']') {
10364 /* conditional crap, skip both ']' ! */
10365 base++;
10366 continue;
10367 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010368 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010369 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10370 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010371 if (buf[base + i] == '>') {
10372#if 0
10373 fprintf(stderr, "found\n");
10374#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010375 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010376 }
10377 if (!IS_BLANK_CH(buf[base + i])) {
10378#if 0
10379 fprintf(stderr, "not found\n");
10380#endif
10381 goto not_end_of_int_subset;
10382 }
Owen Taylor3473f882001-02-23 17:55:21 +000010383 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010384#if 0
10385 fprintf(stderr, "end of stream\n");
10386#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010387 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010388
Owen Taylor3473f882001-02-23 17:55:21 +000010389 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010390not_end_of_int_subset:
10391 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010392 }
10393 /*
10394 * We didn't found the end of the Internal subset
10395 */
Owen Taylor3473f882001-02-23 17:55:21 +000010396#ifdef DEBUG_PUSH
10397 if (next == 0)
10398 xmlGenericError(xmlGenericErrorContext,
10399 "PP: lookup of int subset end filed\n");
10400#endif
10401 goto done;
10402
10403found_end_int_subset:
10404 xmlParseInternalSubset(ctxt);
10405 ctxt->inSubset = 2;
10406 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10407 (ctxt->sax->externalSubset != NULL))
10408 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10409 ctxt->extSubSystem, ctxt->extSubURI);
10410 ctxt->inSubset = 0;
10411 ctxt->instate = XML_PARSER_PROLOG;
10412 ctxt->checkIndex = 0;
10413#ifdef DEBUG_PUSH
10414 xmlGenericError(xmlGenericErrorContext,
10415 "PP: entering PROLOG\n");
10416#endif
10417 break;
10418 }
10419 case XML_PARSER_COMMENT:
10420 xmlGenericError(xmlGenericErrorContext,
10421 "PP: internal error, state == COMMENT\n");
10422 ctxt->instate = XML_PARSER_CONTENT;
10423#ifdef DEBUG_PUSH
10424 xmlGenericError(xmlGenericErrorContext,
10425 "PP: entering CONTENT\n");
10426#endif
10427 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010428 case XML_PARSER_IGNORE:
10429 xmlGenericError(xmlGenericErrorContext,
10430 "PP: internal error, state == IGNORE");
10431 ctxt->instate = XML_PARSER_DTD;
10432#ifdef DEBUG_PUSH
10433 xmlGenericError(xmlGenericErrorContext,
10434 "PP: entering DTD\n");
10435#endif
10436 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010437 case XML_PARSER_PI:
10438 xmlGenericError(xmlGenericErrorContext,
10439 "PP: internal error, state == PI\n");
10440 ctxt->instate = XML_PARSER_CONTENT;
10441#ifdef DEBUG_PUSH
10442 xmlGenericError(xmlGenericErrorContext,
10443 "PP: entering CONTENT\n");
10444#endif
10445 break;
10446 case XML_PARSER_ENTITY_DECL:
10447 xmlGenericError(xmlGenericErrorContext,
10448 "PP: internal error, state == ENTITY_DECL\n");
10449 ctxt->instate = XML_PARSER_DTD;
10450#ifdef DEBUG_PUSH
10451 xmlGenericError(xmlGenericErrorContext,
10452 "PP: entering DTD\n");
10453#endif
10454 break;
10455 case XML_PARSER_ENTITY_VALUE:
10456 xmlGenericError(xmlGenericErrorContext,
10457 "PP: internal error, state == ENTITY_VALUE\n");
10458 ctxt->instate = XML_PARSER_CONTENT;
10459#ifdef DEBUG_PUSH
10460 xmlGenericError(xmlGenericErrorContext,
10461 "PP: entering DTD\n");
10462#endif
10463 break;
10464 case XML_PARSER_ATTRIBUTE_VALUE:
10465 xmlGenericError(xmlGenericErrorContext,
10466 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10467 ctxt->instate = XML_PARSER_START_TAG;
10468#ifdef DEBUG_PUSH
10469 xmlGenericError(xmlGenericErrorContext,
10470 "PP: entering START_TAG\n");
10471#endif
10472 break;
10473 case XML_PARSER_SYSTEM_LITERAL:
10474 xmlGenericError(xmlGenericErrorContext,
10475 "PP: internal error, state == SYSTEM_LITERAL\n");
10476 ctxt->instate = XML_PARSER_START_TAG;
10477#ifdef DEBUG_PUSH
10478 xmlGenericError(xmlGenericErrorContext,
10479 "PP: entering START_TAG\n");
10480#endif
10481 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010482 case XML_PARSER_PUBLIC_LITERAL:
10483 xmlGenericError(xmlGenericErrorContext,
10484 "PP: internal error, state == PUBLIC_LITERAL\n");
10485 ctxt->instate = XML_PARSER_START_TAG;
10486#ifdef DEBUG_PUSH
10487 xmlGenericError(xmlGenericErrorContext,
10488 "PP: entering START_TAG\n");
10489#endif
10490 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010491 }
10492 }
10493done:
10494#ifdef DEBUG_PUSH
10495 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10496#endif
10497 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010498encoding_error:
10499 {
10500 char buffer[150];
10501
10502 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10503 ctxt->input->cur[0], ctxt->input->cur[1],
10504 ctxt->input->cur[2], ctxt->input->cur[3]);
10505 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10506 "Input is not proper UTF-8, indicate encoding !\n%s",
10507 BAD_CAST buffer, NULL);
10508 }
10509 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010510}
10511
10512/**
Owen Taylor3473f882001-02-23 17:55:21 +000010513 * xmlParseChunk:
10514 * @ctxt: an XML parser context
10515 * @chunk: an char array
10516 * @size: the size in byte of the chunk
10517 * @terminate: last chunk indicator
10518 *
10519 * Parse a Chunk of memory
10520 *
10521 * Returns zero if no error, the xmlParserErrors otherwise.
10522 */
10523int
10524xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10525 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010526 int end_in_lf = 0;
10527
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010528 if (ctxt == NULL)
10529 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010530 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010531 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010532 if (ctxt->instate == XML_PARSER_START)
10533 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010534 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10535 (chunk[size - 1] == '\r')) {
10536 end_in_lf = 1;
10537 size--;
10538 }
Owen Taylor3473f882001-02-23 17:55:21 +000010539 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10540 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10541 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10542 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010543 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010544
William M. Bracka3215c72004-07-31 16:24:01 +000010545 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10546 if (res < 0) {
10547 ctxt->errNo = XML_PARSER_EOF;
10548 ctxt->disableSAX = 1;
10549 return (XML_PARSER_EOF);
10550 }
Owen Taylor3473f882001-02-23 17:55:21 +000010551 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10552 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010553 ctxt->input->end =
10554 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010555#ifdef DEBUG_PUSH
10556 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10557#endif
10558
Owen Taylor3473f882001-02-23 17:55:21 +000010559 } else if (ctxt->instate != XML_PARSER_EOF) {
10560 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10561 xmlParserInputBufferPtr in = ctxt->input->buf;
10562 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10563 (in->raw != NULL)) {
10564 int nbchars;
10565
10566 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10567 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010568 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010569 xmlGenericError(xmlGenericErrorContext,
10570 "xmlParseChunk: encoder error\n");
10571 return(XML_ERR_INVALID_ENCODING);
10572 }
10573 }
10574 }
10575 }
10576 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010577 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10578 (ctxt->input->buf != NULL)) {
10579 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10580 }
Daniel Veillard14412512005-01-21 23:53:26 +000010581 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010582 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010583 if (terminate) {
10584 /*
10585 * Check for termination
10586 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010587 int avail = 0;
10588
10589 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010590 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010591 avail = ctxt->input->length -
10592 (ctxt->input->cur - ctxt->input->base);
10593 else
10594 avail = ctxt->input->buf->buffer->use -
10595 (ctxt->input->cur - ctxt->input->base);
10596 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010597
Owen Taylor3473f882001-02-23 17:55:21 +000010598 if ((ctxt->instate != XML_PARSER_EOF) &&
10599 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010600 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010601 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010602 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010603 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010604 }
Owen Taylor3473f882001-02-23 17:55:21 +000010605 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010606 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010607 ctxt->sax->endDocument(ctxt->userData);
10608 }
10609 ctxt->instate = XML_PARSER_EOF;
10610 }
10611 return((xmlParserErrors) ctxt->errNo);
10612}
10613
10614/************************************************************************
10615 * *
10616 * I/O front end functions to the parser *
10617 * *
10618 ************************************************************************/
10619
10620/**
Owen Taylor3473f882001-02-23 17:55:21 +000010621 * xmlCreatePushParserCtxt:
10622 * @sax: a SAX handler
10623 * @user_data: The user data returned on SAX callbacks
10624 * @chunk: a pointer to an array of chars
10625 * @size: number of chars in the array
10626 * @filename: an optional file name or URI
10627 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010628 * Create a parser context for using the XML parser in push mode.
10629 * If @buffer and @size are non-NULL, the data is used to detect
10630 * the encoding. The remaining characters will be parsed so they
10631 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010632 * To allow content encoding detection, @size should be >= 4
10633 * The value of @filename is used for fetching external entities
10634 * and error/warning reports.
10635 *
10636 * Returns the new parser context or NULL
10637 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010638
Owen Taylor3473f882001-02-23 17:55:21 +000010639xmlParserCtxtPtr
10640xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10641 const char *chunk, int size, const char *filename) {
10642 xmlParserCtxtPtr ctxt;
10643 xmlParserInputPtr inputStream;
10644 xmlParserInputBufferPtr buf;
10645 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10646
10647 /*
10648 * plug some encoding conversion routines
10649 */
10650 if ((chunk != NULL) && (size >= 4))
10651 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10652
10653 buf = xmlAllocParserInputBuffer(enc);
10654 if (buf == NULL) return(NULL);
10655
10656 ctxt = xmlNewParserCtxt();
10657 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010658 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010659 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010660 return(NULL);
10661 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010662 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010663 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10664 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010665 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010666 xmlFreeParserInputBuffer(buf);
10667 xmlFreeParserCtxt(ctxt);
10668 return(NULL);
10669 }
Owen Taylor3473f882001-02-23 17:55:21 +000010670 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010671#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010672 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010673#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010674 xmlFree(ctxt->sax);
10675 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10676 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010677 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010678 xmlFreeParserInputBuffer(buf);
10679 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010680 return(NULL);
10681 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010682 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10683 if (sax->initialized == XML_SAX2_MAGIC)
10684 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10685 else
10686 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010687 if (user_data != NULL)
10688 ctxt->userData = user_data;
10689 }
10690 if (filename == NULL) {
10691 ctxt->directory = NULL;
10692 } else {
10693 ctxt->directory = xmlParserGetDirectory(filename);
10694 }
10695
10696 inputStream = xmlNewInputStream(ctxt);
10697 if (inputStream == NULL) {
10698 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010699 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010700 return(NULL);
10701 }
10702
10703 if (filename == NULL)
10704 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010705 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010706 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010707 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010708 if (inputStream->filename == NULL) {
10709 xmlFreeParserCtxt(ctxt);
10710 xmlFreeParserInputBuffer(buf);
10711 return(NULL);
10712 }
10713 }
Owen Taylor3473f882001-02-23 17:55:21 +000010714 inputStream->buf = buf;
10715 inputStream->base = inputStream->buf->buffer->content;
10716 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010717 inputStream->end =
10718 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010719
10720 inputPush(ctxt, inputStream);
10721
William M. Brack3a1cd212005-02-11 14:35:54 +000010722 /*
10723 * If the caller didn't provide an initial 'chunk' for determining
10724 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10725 * that it can be automatically determined later
10726 */
10727 if ((size == 0) || (chunk == NULL)) {
10728 ctxt->charset = XML_CHAR_ENCODING_NONE;
10729 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010730 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10731 int cur = ctxt->input->cur - ctxt->input->base;
10732
Owen Taylor3473f882001-02-23 17:55:21 +000010733 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010734
10735 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10736 ctxt->input->cur = ctxt->input->base + cur;
10737 ctxt->input->end =
10738 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010739#ifdef DEBUG_PUSH
10740 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10741#endif
10742 }
10743
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010744 if (enc != XML_CHAR_ENCODING_NONE) {
10745 xmlSwitchEncoding(ctxt, enc);
10746 }
10747
Owen Taylor3473f882001-02-23 17:55:21 +000010748 return(ctxt);
10749}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010750#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010751
10752/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010753 * xmlStopParser:
10754 * @ctxt: an XML parser context
10755 *
10756 * Blocks further parser processing
10757 */
10758void
10759xmlStopParser(xmlParserCtxtPtr ctxt) {
10760 if (ctxt == NULL)
10761 return;
10762 ctxt->instate = XML_PARSER_EOF;
10763 ctxt->disableSAX = 1;
10764 if (ctxt->input != NULL) {
10765 ctxt->input->cur = BAD_CAST"";
10766 ctxt->input->base = ctxt->input->cur;
10767 }
10768}
10769
10770/**
Owen Taylor3473f882001-02-23 17:55:21 +000010771 * xmlCreateIOParserCtxt:
10772 * @sax: a SAX handler
10773 * @user_data: The user data returned on SAX callbacks
10774 * @ioread: an I/O read function
10775 * @ioclose: an I/O close function
10776 * @ioctx: an I/O handler
10777 * @enc: the charset encoding if known
10778 *
10779 * Create a parser context for using the XML parser with an existing
10780 * I/O stream
10781 *
10782 * Returns the new parser context or NULL
10783 */
10784xmlParserCtxtPtr
10785xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10786 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10787 void *ioctx, xmlCharEncoding enc) {
10788 xmlParserCtxtPtr ctxt;
10789 xmlParserInputPtr inputStream;
10790 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010791
10792 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010793
10794 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10795 if (buf == NULL) return(NULL);
10796
10797 ctxt = xmlNewParserCtxt();
10798 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010799 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010800 return(NULL);
10801 }
10802 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010803#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010804 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010805#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010806 xmlFree(ctxt->sax);
10807 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10808 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010809 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010810 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010811 return(NULL);
10812 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010813 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10814 if (sax->initialized == XML_SAX2_MAGIC)
10815 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10816 else
10817 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010818 if (user_data != NULL)
10819 ctxt->userData = user_data;
10820 }
10821
10822 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10823 if (inputStream == NULL) {
10824 xmlFreeParserCtxt(ctxt);
10825 return(NULL);
10826 }
10827 inputPush(ctxt, inputStream);
10828
10829 return(ctxt);
10830}
10831
Daniel Veillard4432df22003-09-28 18:58:27 +000010832#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010833/************************************************************************
10834 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010835 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010836 * *
10837 ************************************************************************/
10838
10839/**
10840 * xmlIOParseDTD:
10841 * @sax: the SAX handler block or NULL
10842 * @input: an Input Buffer
10843 * @enc: the charset encoding if known
10844 *
10845 * Load and parse a DTD
10846 *
10847 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010848 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010849 */
10850
10851xmlDtdPtr
10852xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10853 xmlCharEncoding enc) {
10854 xmlDtdPtr ret = NULL;
10855 xmlParserCtxtPtr ctxt;
10856 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010857 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010858
10859 if (input == NULL)
10860 return(NULL);
10861
10862 ctxt = xmlNewParserCtxt();
10863 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010864 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010865 return(NULL);
10866 }
10867
10868 /*
10869 * Set-up the SAX context
10870 */
10871 if (sax != NULL) {
10872 if (ctxt->sax != NULL)
10873 xmlFree(ctxt->sax);
10874 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010875 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010877 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010878
10879 /*
10880 * generate a parser input from the I/O handler
10881 */
10882
Daniel Veillard43caefb2003-12-07 19:32:22 +000010883 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010884 if (pinput == NULL) {
10885 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010886 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 xmlFreeParserCtxt(ctxt);
10888 return(NULL);
10889 }
10890
10891 /*
10892 * plug some encoding conversion routines here.
10893 */
10894 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010895 if (enc != XML_CHAR_ENCODING_NONE) {
10896 xmlSwitchEncoding(ctxt, enc);
10897 }
Owen Taylor3473f882001-02-23 17:55:21 +000010898
10899 pinput->filename = NULL;
10900 pinput->line = 1;
10901 pinput->col = 1;
10902 pinput->base = ctxt->input->cur;
10903 pinput->cur = ctxt->input->cur;
10904 pinput->free = NULL;
10905
10906 /*
10907 * let's parse that entity knowing it's an external subset.
10908 */
10909 ctxt->inSubset = 2;
10910 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10911 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10912 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010913
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010914 if ((enc == XML_CHAR_ENCODING_NONE) &&
10915 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010916 /*
10917 * Get the 4 first bytes and decode the charset
10918 * if enc != XML_CHAR_ENCODING_NONE
10919 * plug some encoding conversion routines.
10920 */
10921 start[0] = RAW;
10922 start[1] = NXT(1);
10923 start[2] = NXT(2);
10924 start[3] = NXT(3);
10925 enc = xmlDetectCharEncoding(start, 4);
10926 if (enc != XML_CHAR_ENCODING_NONE) {
10927 xmlSwitchEncoding(ctxt, enc);
10928 }
10929 }
10930
Owen Taylor3473f882001-02-23 17:55:21 +000010931 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10932
10933 if (ctxt->myDoc != NULL) {
10934 if (ctxt->wellFormed) {
10935 ret = ctxt->myDoc->extSubset;
10936 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010937 if (ret != NULL) {
10938 xmlNodePtr tmp;
10939
10940 ret->doc = NULL;
10941 tmp = ret->children;
10942 while (tmp != NULL) {
10943 tmp->doc = NULL;
10944 tmp = tmp->next;
10945 }
10946 }
Owen Taylor3473f882001-02-23 17:55:21 +000010947 } else {
10948 ret = NULL;
10949 }
10950 xmlFreeDoc(ctxt->myDoc);
10951 ctxt->myDoc = NULL;
10952 }
10953 if (sax != NULL) ctxt->sax = NULL;
10954 xmlFreeParserCtxt(ctxt);
10955
10956 return(ret);
10957}
10958
10959/**
10960 * xmlSAXParseDTD:
10961 * @sax: the SAX handler block
10962 * @ExternalID: a NAME* containing the External ID of the DTD
10963 * @SystemID: a NAME* containing the URL to the DTD
10964 *
10965 * Load and parse an external subset.
10966 *
10967 * Returns the resulting xmlDtdPtr or NULL in case of error.
10968 */
10969
10970xmlDtdPtr
10971xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10972 const xmlChar *SystemID) {
10973 xmlDtdPtr ret = NULL;
10974 xmlParserCtxtPtr ctxt;
10975 xmlParserInputPtr input = NULL;
10976 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010977 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010978
10979 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10980
10981 ctxt = xmlNewParserCtxt();
10982 if (ctxt == NULL) {
10983 return(NULL);
10984 }
10985
10986 /*
10987 * Set-up the SAX context
10988 */
10989 if (sax != NULL) {
10990 if (ctxt->sax != NULL)
10991 xmlFree(ctxt->sax);
10992 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010993 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010994 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010995
10996 /*
10997 * Canonicalise the system ID
10998 */
10999 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011000 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011001 xmlFreeParserCtxt(ctxt);
11002 return(NULL);
11003 }
Owen Taylor3473f882001-02-23 17:55:21 +000011004
11005 /*
11006 * Ask the Entity resolver to load the damn thing
11007 */
11008
11009 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011010 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11011 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011012 if (input == NULL) {
11013 if (sax != NULL) ctxt->sax = NULL;
11014 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011015 if (systemIdCanonic != NULL)
11016 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011017 return(NULL);
11018 }
11019
11020 /*
11021 * plug some encoding conversion routines here.
11022 */
11023 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011024 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11026 xmlSwitchEncoding(ctxt, enc);
11027 }
Owen Taylor3473f882001-02-23 17:55:21 +000011028
11029 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011030 input->filename = (char *) systemIdCanonic;
11031 else
11032 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011033 input->line = 1;
11034 input->col = 1;
11035 input->base = ctxt->input->cur;
11036 input->cur = ctxt->input->cur;
11037 input->free = NULL;
11038
11039 /*
11040 * let's parse that entity knowing it's an external subset.
11041 */
11042 ctxt->inSubset = 2;
11043 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11044 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11045 ExternalID, SystemID);
11046 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11047
11048 if (ctxt->myDoc != NULL) {
11049 if (ctxt->wellFormed) {
11050 ret = ctxt->myDoc->extSubset;
11051 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011052 if (ret != NULL) {
11053 xmlNodePtr tmp;
11054
11055 ret->doc = NULL;
11056 tmp = ret->children;
11057 while (tmp != NULL) {
11058 tmp->doc = NULL;
11059 tmp = tmp->next;
11060 }
11061 }
Owen Taylor3473f882001-02-23 17:55:21 +000011062 } else {
11063 ret = NULL;
11064 }
11065 xmlFreeDoc(ctxt->myDoc);
11066 ctxt->myDoc = NULL;
11067 }
11068 if (sax != NULL) ctxt->sax = NULL;
11069 xmlFreeParserCtxt(ctxt);
11070
11071 return(ret);
11072}
11073
Daniel Veillard4432df22003-09-28 18:58:27 +000011074
Owen Taylor3473f882001-02-23 17:55:21 +000011075/**
11076 * xmlParseDTD:
11077 * @ExternalID: a NAME* containing the External ID of the DTD
11078 * @SystemID: a NAME* containing the URL to the DTD
11079 *
11080 * Load and parse an external subset.
11081 *
11082 * Returns the resulting xmlDtdPtr or NULL in case of error.
11083 */
11084
11085xmlDtdPtr
11086xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11087 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11088}
Daniel Veillard4432df22003-09-28 18:58:27 +000011089#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011090
11091/************************************************************************
11092 * *
11093 * Front ends when parsing an Entity *
11094 * *
11095 ************************************************************************/
11096
11097/**
Owen Taylor3473f882001-02-23 17:55:21 +000011098 * xmlParseCtxtExternalEntity:
11099 * @ctx: the existing parsing context
11100 * @URL: the URL for the entity to load
11101 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011102 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011103 *
11104 * Parse an external general entity within an existing parsing context
11105 * An external general parsed entity is well-formed if it matches the
11106 * production labeled extParsedEnt.
11107 *
11108 * [78] extParsedEnt ::= TextDecl? content
11109 *
11110 * Returns 0 if the entity is well formed, -1 in case of args problem and
11111 * the parser error code otherwise
11112 */
11113
11114int
11115xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011116 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011117 xmlParserCtxtPtr ctxt;
11118 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011119 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011120 xmlSAXHandlerPtr oldsax = NULL;
11121 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011122 xmlChar start[4];
11123 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011124 xmlParserInputPtr inputStream;
11125 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011126
Daniel Veillardce682bc2004-11-05 17:22:25 +000011127 if (ctx == NULL) return(-1);
11128
Owen Taylor3473f882001-02-23 17:55:21 +000011129 if (ctx->depth > 40) {
11130 return(XML_ERR_ENTITY_LOOP);
11131 }
11132
Daniel Veillardcda96922001-08-21 10:56:31 +000011133 if (lst != NULL)
11134 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011135 if ((URL == NULL) && (ID == NULL))
11136 return(-1);
11137 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11138 return(-1);
11139
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011140 ctxt = xmlNewParserCtxt();
11141 if (ctxt == NULL) {
11142 return(-1);
11143 }
11144
Owen Taylor3473f882001-02-23 17:55:21 +000011145 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011146 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011147
11148 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11149 if (inputStream == NULL) {
11150 xmlFreeParserCtxt(ctxt);
11151 return(-1);
11152 }
11153
11154 inputPush(ctxt, inputStream);
11155
11156 if ((ctxt->directory == NULL) && (directory == NULL))
11157 directory = xmlParserGetDirectory((char *)URL);
11158 if ((ctxt->directory == NULL) && (directory != NULL))
11159 ctxt->directory = directory;
11160
Owen Taylor3473f882001-02-23 17:55:21 +000011161 oldsax = ctxt->sax;
11162 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011163 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011164 newDoc = xmlNewDoc(BAD_CAST "1.0");
11165 if (newDoc == NULL) {
11166 xmlFreeParserCtxt(ctxt);
11167 return(-1);
11168 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011169 if (ctx->myDoc->dict) {
11170 newDoc->dict = ctx->myDoc->dict;
11171 xmlDictReference(newDoc->dict);
11172 }
Owen Taylor3473f882001-02-23 17:55:21 +000011173 if (ctx->myDoc != NULL) {
11174 newDoc->intSubset = ctx->myDoc->intSubset;
11175 newDoc->extSubset = ctx->myDoc->extSubset;
11176 }
11177 if (ctx->myDoc->URL != NULL) {
11178 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11179 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011180 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11181 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011182 ctxt->sax = oldsax;
11183 xmlFreeParserCtxt(ctxt);
11184 newDoc->intSubset = NULL;
11185 newDoc->extSubset = NULL;
11186 xmlFreeDoc(newDoc);
11187 return(-1);
11188 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011189 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011190 nodePush(ctxt, newDoc->children);
11191 if (ctx->myDoc == NULL) {
11192 ctxt->myDoc = newDoc;
11193 } else {
11194 ctxt->myDoc = ctx->myDoc;
11195 newDoc->children->doc = ctx->myDoc;
11196 }
11197
Daniel Veillard87a764e2001-06-20 17:41:10 +000011198 /*
11199 * Get the 4 first bytes and decode the charset
11200 * if enc != XML_CHAR_ENCODING_NONE
11201 * plug some encoding conversion routines.
11202 */
11203 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011204 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11205 start[0] = RAW;
11206 start[1] = NXT(1);
11207 start[2] = NXT(2);
11208 start[3] = NXT(3);
11209 enc = xmlDetectCharEncoding(start, 4);
11210 if (enc != XML_CHAR_ENCODING_NONE) {
11211 xmlSwitchEncoding(ctxt, enc);
11212 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011213 }
11214
Owen Taylor3473f882001-02-23 17:55:21 +000011215 /*
11216 * Parse a possible text declaration first
11217 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011218 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011219 xmlParseTextDecl(ctxt);
11220 }
11221
11222 /*
11223 * Doing validity checking on chunk doesn't make sense
11224 */
11225 ctxt->instate = XML_PARSER_CONTENT;
11226 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011227 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011228 ctxt->loadsubset = ctx->loadsubset;
11229 ctxt->depth = ctx->depth + 1;
11230 ctxt->replaceEntities = ctx->replaceEntities;
11231 if (ctxt->validate) {
11232 ctxt->vctxt.error = ctx->vctxt.error;
11233 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011234 } else {
11235 ctxt->vctxt.error = NULL;
11236 ctxt->vctxt.warning = NULL;
11237 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011238 ctxt->vctxt.nodeTab = NULL;
11239 ctxt->vctxt.nodeNr = 0;
11240 ctxt->vctxt.nodeMax = 0;
11241 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011242 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11243 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011244 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11245 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11246 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011247 ctxt->dictNames = ctx->dictNames;
11248 ctxt->attsDefault = ctx->attsDefault;
11249 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011250 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011251
11252 xmlParseContent(ctxt);
11253
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011254 ctx->validate = ctxt->validate;
11255 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011256 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011257 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011258 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011259 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011260 }
11261 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011262 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011263 }
11264
11265 if (!ctxt->wellFormed) {
11266 if (ctxt->errNo == 0)
11267 ret = 1;
11268 else
11269 ret = ctxt->errNo;
11270 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011271 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011272 xmlNodePtr cur;
11273
11274 /*
11275 * Return the newly created nodeset after unlinking it from
11276 * they pseudo parent.
11277 */
11278 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011279 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011280 while (cur != NULL) {
11281 cur->parent = NULL;
11282 cur = cur->next;
11283 }
11284 newDoc->children->children = NULL;
11285 }
11286 ret = 0;
11287 }
11288 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011289 ctxt->dict = NULL;
11290 ctxt->attsDefault = NULL;
11291 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011292 xmlFreeParserCtxt(ctxt);
11293 newDoc->intSubset = NULL;
11294 newDoc->extSubset = NULL;
11295 xmlFreeDoc(newDoc);
11296
11297 return(ret);
11298}
11299
11300/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011301 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011302 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011303 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011304 * @sax: the SAX handler bloc (possibly NULL)
11305 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11306 * @depth: Used for loop detection, use 0
11307 * @URL: the URL for the entity to load
11308 * @ID: the System ID for the entity to load
11309 * @list: the return value for the set of parsed nodes
11310 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011311 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011312 *
11313 * Returns 0 if the entity is well formed, -1 in case of args problem and
11314 * the parser error code otherwise
11315 */
11316
Daniel Veillard7d515752003-09-26 19:12:37 +000011317static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011318xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11319 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011320 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011321 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011322 xmlParserCtxtPtr ctxt;
11323 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011324 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011325 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011326 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011327 xmlChar start[4];
11328 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011329
11330 if (depth > 40) {
11331 return(XML_ERR_ENTITY_LOOP);
11332 }
11333
11334
11335
11336 if (list != NULL)
11337 *list = NULL;
11338 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011339 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011340 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011341 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011342
11343
11344 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011345 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011346 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011347 if (oldctxt != NULL) {
11348 ctxt->_private = oldctxt->_private;
11349 ctxt->loadsubset = oldctxt->loadsubset;
11350 ctxt->validate = oldctxt->validate;
11351 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011352 ctxt->record_info = oldctxt->record_info;
11353 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11354 ctxt->node_seq.length = oldctxt->node_seq.length;
11355 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011356 } else {
11357 /*
11358 * Doing validity checking on chunk without context
11359 * doesn't make sense
11360 */
11361 ctxt->_private = NULL;
11362 ctxt->validate = 0;
11363 ctxt->external = 2;
11364 ctxt->loadsubset = 0;
11365 }
Owen Taylor3473f882001-02-23 17:55:21 +000011366 if (sax != NULL) {
11367 oldsax = ctxt->sax;
11368 ctxt->sax = sax;
11369 if (user_data != NULL)
11370 ctxt->userData = user_data;
11371 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011372 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011373 newDoc = xmlNewDoc(BAD_CAST "1.0");
11374 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011375 ctxt->node_seq.maximum = 0;
11376 ctxt->node_seq.length = 0;
11377 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011378 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011379 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011380 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011381 newDoc->intSubset = doc->intSubset;
11382 newDoc->extSubset = doc->extSubset;
11383 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011384 xmlDictReference(newDoc->dict);
11385
Owen Taylor3473f882001-02-23 17:55:21 +000011386 if (doc->URL != NULL) {
11387 newDoc->URL = xmlStrdup(doc->URL);
11388 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011389 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11390 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011391 if (sax != NULL)
11392 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011393 ctxt->node_seq.maximum = 0;
11394 ctxt->node_seq.length = 0;
11395 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011396 xmlFreeParserCtxt(ctxt);
11397 newDoc->intSubset = NULL;
11398 newDoc->extSubset = NULL;
11399 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011400 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011401 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011402 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011403 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011404 ctxt->myDoc = doc;
11405 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011406
Daniel Veillard87a764e2001-06-20 17:41:10 +000011407 /*
11408 * Get the 4 first bytes and decode the charset
11409 * if enc != XML_CHAR_ENCODING_NONE
11410 * plug some encoding conversion routines.
11411 */
11412 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011413 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11414 start[0] = RAW;
11415 start[1] = NXT(1);
11416 start[2] = NXT(2);
11417 start[3] = NXT(3);
11418 enc = xmlDetectCharEncoding(start, 4);
11419 if (enc != XML_CHAR_ENCODING_NONE) {
11420 xmlSwitchEncoding(ctxt, enc);
11421 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011422 }
11423
Owen Taylor3473f882001-02-23 17:55:21 +000011424 /*
11425 * Parse a possible text declaration first
11426 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011427 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011428 xmlParseTextDecl(ctxt);
11429 }
11430
Owen Taylor3473f882001-02-23 17:55:21 +000011431 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011432 ctxt->depth = depth;
11433
11434 xmlParseContent(ctxt);
11435
Daniel Veillard561b7f82002-03-20 21:55:57 +000011436 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011437 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011438 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011439 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011440 }
11441 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011443 }
11444
11445 if (!ctxt->wellFormed) {
11446 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011447 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011448 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011449 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011450 } else {
11451 if (list != NULL) {
11452 xmlNodePtr cur;
11453
11454 /*
11455 * Return the newly created nodeset after unlinking it from
11456 * they pseudo parent.
11457 */
11458 cur = newDoc->children->children;
11459 *list = cur;
11460 while (cur != NULL) {
11461 cur->parent = NULL;
11462 cur = cur->next;
11463 }
11464 newDoc->children->children = NULL;
11465 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011466 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011467 }
11468 if (sax != NULL)
11469 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011470 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11471 oldctxt->node_seq.length = ctxt->node_seq.length;
11472 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011473 ctxt->node_seq.maximum = 0;
11474 ctxt->node_seq.length = 0;
11475 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011476 xmlFreeParserCtxt(ctxt);
11477 newDoc->intSubset = NULL;
11478 newDoc->extSubset = NULL;
11479 xmlFreeDoc(newDoc);
11480
11481 return(ret);
11482}
11483
Daniel Veillard81273902003-09-30 00:43:48 +000011484#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011485/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011486 * xmlParseExternalEntity:
11487 * @doc: the document the chunk pertains to
11488 * @sax: the SAX handler bloc (possibly NULL)
11489 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11490 * @depth: Used for loop detection, use 0
11491 * @URL: the URL for the entity to load
11492 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011493 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011494 *
11495 * Parse an external general entity
11496 * An external general parsed entity is well-formed if it matches the
11497 * production labeled extParsedEnt.
11498 *
11499 * [78] extParsedEnt ::= TextDecl? content
11500 *
11501 * Returns 0 if the entity is well formed, -1 in case of args problem and
11502 * the parser error code otherwise
11503 */
11504
11505int
11506xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011507 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011508 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011509 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011510}
11511
11512/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011513 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011514 * @doc: the document the chunk pertains to
11515 * @sax: the SAX handler bloc (possibly NULL)
11516 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11517 * @depth: Used for loop detection, use 0
11518 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011519 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011520 *
11521 * Parse a well-balanced chunk of an XML document
11522 * called by the parser
11523 * The allowed sequence for the Well Balanced Chunk is the one defined by
11524 * the content production in the XML grammar:
11525 *
11526 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11527 *
11528 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11529 * the parser error code otherwise
11530 */
11531
11532int
11533xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011534 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011535 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11536 depth, string, lst, 0 );
11537}
Daniel Veillard81273902003-09-30 00:43:48 +000011538#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011539
11540/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011541 * xmlParseBalancedChunkMemoryInternal:
11542 * @oldctxt: the existing parsing context
11543 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11544 * @user_data: the user data field for the parser context
11545 * @lst: the return value for the set of parsed nodes
11546 *
11547 *
11548 * Parse a well-balanced chunk of an XML document
11549 * called by the parser
11550 * The allowed sequence for the Well Balanced Chunk is the one defined by
11551 * the content production in the XML grammar:
11552 *
11553 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11554 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011555 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11556 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011557 *
11558 * In case recover is set to 1, the nodelist will not be empty even if
11559 * the parsed chunk is not well balanced.
11560 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011561static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011562xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11563 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11564 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011565 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011566 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011567 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011568 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011569 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011570 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011571 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011572
11573 if (oldctxt->depth > 40) {
11574 return(XML_ERR_ENTITY_LOOP);
11575 }
11576
11577
11578 if (lst != NULL)
11579 *lst = NULL;
11580 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011581 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011582
11583 size = xmlStrlen(string);
11584
11585 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011586 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011587 if (user_data != NULL)
11588 ctxt->userData = user_data;
11589 else
11590 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011591 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11592 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011593 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11594 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11595 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011596
11597 oldsax = ctxt->sax;
11598 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011599 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011600 ctxt->replaceEntities = oldctxt->replaceEntities;
11601 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011602
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011603 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011604 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011605 newDoc = xmlNewDoc(BAD_CAST "1.0");
11606 if (newDoc == NULL) {
11607 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011608 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011609 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011610 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011611 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011612 newDoc->dict = ctxt->dict;
11613 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011614 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011615 } else {
11616 ctxt->myDoc = oldctxt->myDoc;
11617 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011618 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011619 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011620 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11621 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011622 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011623 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011624 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011625 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011626 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011627 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011628 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011629 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011630 ctxt->myDoc->children = NULL;
11631 ctxt->myDoc->last = NULL;
11632 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011633 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011634 ctxt->instate = XML_PARSER_CONTENT;
11635 ctxt->depth = oldctxt->depth + 1;
11636
Daniel Veillard328f48c2002-11-15 15:24:34 +000011637 ctxt->validate = 0;
11638 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011639 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11640 /*
11641 * ID/IDREF registration will be done in xmlValidateElement below
11642 */
11643 ctxt->loadsubset |= XML_SKIP_IDS;
11644 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011645 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011646 ctxt->attsDefault = oldctxt->attsDefault;
11647 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011648
Daniel Veillard68e9e742002-11-16 15:35:11 +000011649 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011650 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011651 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011652 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011653 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011654 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011655 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011656 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011657 }
11658
11659 if (!ctxt->wellFormed) {
11660 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011661 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011662 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011663 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011664 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011665 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011666 }
11667
William M. Brack7b9154b2003-09-27 19:23:50 +000011668 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011669 xmlNodePtr cur;
11670
11671 /*
11672 * Return the newly created nodeset after unlinking it from
11673 * they pseudo parent.
11674 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011675 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011676 *lst = cur;
11677 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011678#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011679 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11680 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11681 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011682 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11683 oldctxt->myDoc, cur);
11684 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011685#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011686 cur->parent = NULL;
11687 cur = cur->next;
11688 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011689 ctxt->myDoc->children->children = NULL;
11690 }
11691 if (ctxt->myDoc != NULL) {
11692 xmlFreeNode(ctxt->myDoc->children);
11693 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011694 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011695 }
11696
11697 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011698 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011699 ctxt->attsDefault = NULL;
11700 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011701 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011702 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011703 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011704 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011705
11706 return(ret);
11707}
11708
Daniel Veillard29b17482004-08-16 00:39:03 +000011709/**
11710 * xmlParseInNodeContext:
11711 * @node: the context node
11712 * @data: the input string
11713 * @datalen: the input string length in bytes
11714 * @options: a combination of xmlParserOption
11715 * @lst: the return value for the set of parsed nodes
11716 *
11717 * Parse a well-balanced chunk of an XML document
11718 * within the context (DTD, namespaces, etc ...) of the given node.
11719 *
11720 * The allowed sequence for the data is a Well Balanced Chunk defined by
11721 * the content production in the XML grammar:
11722 *
11723 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11724 *
11725 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11726 * error code otherwise
11727 */
11728xmlParserErrors
11729xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11730 int options, xmlNodePtr *lst) {
11731#ifdef SAX2
11732 xmlParserCtxtPtr ctxt;
11733 xmlDocPtr doc = NULL;
11734 xmlNodePtr fake, cur;
11735 int nsnr = 0;
11736
11737 xmlParserErrors ret = XML_ERR_OK;
11738
11739 /*
11740 * check all input parameters, grab the document
11741 */
11742 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11743 return(XML_ERR_INTERNAL_ERROR);
11744 switch (node->type) {
11745 case XML_ELEMENT_NODE:
11746 case XML_ATTRIBUTE_NODE:
11747 case XML_TEXT_NODE:
11748 case XML_CDATA_SECTION_NODE:
11749 case XML_ENTITY_REF_NODE:
11750 case XML_PI_NODE:
11751 case XML_COMMENT_NODE:
11752 case XML_DOCUMENT_NODE:
11753 case XML_HTML_DOCUMENT_NODE:
11754 break;
11755 default:
11756 return(XML_ERR_INTERNAL_ERROR);
11757
11758 }
11759 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11760 (node->type != XML_DOCUMENT_NODE) &&
11761 (node->type != XML_HTML_DOCUMENT_NODE))
11762 node = node->parent;
11763 if (node == NULL)
11764 return(XML_ERR_INTERNAL_ERROR);
11765 if (node->type == XML_ELEMENT_NODE)
11766 doc = node->doc;
11767 else
11768 doc = (xmlDocPtr) node;
11769 if (doc == NULL)
11770 return(XML_ERR_INTERNAL_ERROR);
11771
11772 /*
11773 * allocate a context and set-up everything not related to the
11774 * node position in the tree
11775 */
11776 if (doc->type == XML_DOCUMENT_NODE)
11777 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11778#ifdef LIBXML_HTML_ENABLED
11779 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11780 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11781#endif
11782 else
11783 return(XML_ERR_INTERNAL_ERROR);
11784
11785 if (ctxt == NULL)
11786 return(XML_ERR_NO_MEMORY);
11787 fake = xmlNewComment(NULL);
11788 if (fake == NULL) {
11789 xmlFreeParserCtxt(ctxt);
11790 return(XML_ERR_NO_MEMORY);
11791 }
11792 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011793
11794 /*
11795 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11796 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11797 * we must wait until the last moment to free the original one.
11798 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011799 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011800 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011801 xmlDictFree(ctxt->dict);
11802 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011803 } else
11804 options |= XML_PARSE_NODICT;
11805
11806 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011807 xmlDetectSAX2(ctxt);
11808 ctxt->myDoc = doc;
11809
11810 if (node->type == XML_ELEMENT_NODE) {
11811 nodePush(ctxt, node);
11812 /*
11813 * initialize the SAX2 namespaces stack
11814 */
11815 cur = node;
11816 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11817 xmlNsPtr ns = cur->nsDef;
11818 const xmlChar *iprefix, *ihref;
11819
11820 while (ns != NULL) {
11821 if (ctxt->dict) {
11822 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11823 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11824 } else {
11825 iprefix = ns->prefix;
11826 ihref = ns->href;
11827 }
11828
11829 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11830 nsPush(ctxt, iprefix, ihref);
11831 nsnr++;
11832 }
11833 ns = ns->next;
11834 }
11835 cur = cur->parent;
11836 }
11837 ctxt->instate = XML_PARSER_CONTENT;
11838 }
11839
11840 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11841 /*
11842 * ID/IDREF registration will be done in xmlValidateElement below
11843 */
11844 ctxt->loadsubset |= XML_SKIP_IDS;
11845 }
11846
Daniel Veillard499cc922006-01-18 17:22:35 +000011847#ifdef LIBXML_HTML_ENABLED
11848 if (doc->type == XML_HTML_DOCUMENT_NODE)
11849 __htmlParseContent(ctxt);
11850 else
11851#endif
11852 xmlParseContent(ctxt);
11853
Daniel Veillard29b17482004-08-16 00:39:03 +000011854 nsPop(ctxt, nsnr);
11855 if ((RAW == '<') && (NXT(1) == '/')) {
11856 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11857 } else if (RAW != 0) {
11858 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11859 }
11860 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11861 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11862 ctxt->wellFormed = 0;
11863 }
11864
11865 if (!ctxt->wellFormed) {
11866 if (ctxt->errNo == 0)
11867 ret = XML_ERR_INTERNAL_ERROR;
11868 else
11869 ret = (xmlParserErrors)ctxt->errNo;
11870 } else {
11871 ret = XML_ERR_OK;
11872 }
11873
11874 /*
11875 * Return the newly created nodeset after unlinking it from
11876 * the pseudo sibling.
11877 */
11878
11879 cur = fake->next;
11880 fake->next = NULL;
11881 node->last = fake;
11882
11883 if (cur != NULL) {
11884 cur->prev = NULL;
11885 }
11886
11887 *lst = cur;
11888
11889 while (cur != NULL) {
11890 cur->parent = NULL;
11891 cur = cur->next;
11892 }
11893
11894 xmlUnlinkNode(fake);
11895 xmlFreeNode(fake);
11896
11897
11898 if (ret != XML_ERR_OK) {
11899 xmlFreeNodeList(*lst);
11900 *lst = NULL;
11901 }
William M. Brackc3f81342004-10-03 01:22:44 +000011902
William M. Brackb7b54de2004-10-06 16:38:01 +000011903 if (doc->dict != NULL)
11904 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011905 xmlFreeParserCtxt(ctxt);
11906
11907 return(ret);
11908#else /* !SAX2 */
11909 return(XML_ERR_INTERNAL_ERROR);
11910#endif
11911}
11912
Daniel Veillard81273902003-09-30 00:43:48 +000011913#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011914/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011915 * xmlParseBalancedChunkMemoryRecover:
11916 * @doc: the document the chunk pertains to
11917 * @sax: the SAX handler bloc (possibly NULL)
11918 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11919 * @depth: Used for loop detection, use 0
11920 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11921 * @lst: the return value for the set of parsed nodes
11922 * @recover: return nodes even if the data is broken (use 0)
11923 *
11924 *
11925 * Parse a well-balanced chunk of an XML document
11926 * called by the parser
11927 * The allowed sequence for the Well Balanced Chunk is the one defined by
11928 * the content production in the XML grammar:
11929 *
11930 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11931 *
11932 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11933 * the parser error code otherwise
11934 *
11935 * In case recover is set to 1, the nodelist will not be empty even if
11936 * the parsed chunk is not well balanced.
11937 */
11938int
11939xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11940 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11941 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011942 xmlParserCtxtPtr ctxt;
11943 xmlDocPtr newDoc;
11944 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011945 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011946 int size;
11947 int ret = 0;
11948
11949 if (depth > 40) {
11950 return(XML_ERR_ENTITY_LOOP);
11951 }
11952
11953
Daniel Veillardcda96922001-08-21 10:56:31 +000011954 if (lst != NULL)
11955 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011956 if (string == NULL)
11957 return(-1);
11958
11959 size = xmlStrlen(string);
11960
11961 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11962 if (ctxt == NULL) return(-1);
11963 ctxt->userData = ctxt;
11964 if (sax != NULL) {
11965 oldsax = ctxt->sax;
11966 ctxt->sax = sax;
11967 if (user_data != NULL)
11968 ctxt->userData = user_data;
11969 }
11970 newDoc = xmlNewDoc(BAD_CAST "1.0");
11971 if (newDoc == NULL) {
11972 xmlFreeParserCtxt(ctxt);
11973 return(-1);
11974 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011975 if ((doc != NULL) && (doc->dict != NULL)) {
11976 xmlDictFree(ctxt->dict);
11977 ctxt->dict = doc->dict;
11978 xmlDictReference(ctxt->dict);
11979 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11980 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11981 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11982 ctxt->dictNames = 1;
11983 } else {
11984 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11985 }
Owen Taylor3473f882001-02-23 17:55:21 +000011986 if (doc != NULL) {
11987 newDoc->intSubset = doc->intSubset;
11988 newDoc->extSubset = doc->extSubset;
11989 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011990 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11991 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011992 if (sax != NULL)
11993 ctxt->sax = oldsax;
11994 xmlFreeParserCtxt(ctxt);
11995 newDoc->intSubset = NULL;
11996 newDoc->extSubset = NULL;
11997 xmlFreeDoc(newDoc);
11998 return(-1);
11999 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012000 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12001 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012002 if (doc == NULL) {
12003 ctxt->myDoc = newDoc;
12004 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012005 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012006 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012007 /* Ensure that doc has XML spec namespace */
12008 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12009 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012010 }
12011 ctxt->instate = XML_PARSER_CONTENT;
12012 ctxt->depth = depth;
12013
12014 /*
12015 * Doing validity checking on chunk doesn't make sense
12016 */
12017 ctxt->validate = 0;
12018 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012019 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012020
Daniel Veillardb39bc392002-10-26 19:29:51 +000012021 if ( doc != NULL ){
12022 content = doc->children;
12023 doc->children = NULL;
12024 xmlParseContent(ctxt);
12025 doc->children = content;
12026 }
12027 else {
12028 xmlParseContent(ctxt);
12029 }
Owen Taylor3473f882001-02-23 17:55:21 +000012030 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012031 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012032 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012033 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012034 }
12035 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012036 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012037 }
12038
12039 if (!ctxt->wellFormed) {
12040 if (ctxt->errNo == 0)
12041 ret = 1;
12042 else
12043 ret = ctxt->errNo;
12044 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012045 ret = 0;
12046 }
12047
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012048 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12049 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012050
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012051 /*
12052 * Return the newly created nodeset after unlinking it from
12053 * they pseudo parent.
12054 */
12055 cur = newDoc->children->children;
12056 *lst = cur;
12057 while (cur != NULL) {
12058 xmlSetTreeDoc(cur, doc);
12059 cur->parent = NULL;
12060 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012061 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012062 newDoc->children->children = NULL;
12063 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012064
Owen Taylor3473f882001-02-23 17:55:21 +000012065 if (sax != NULL)
12066 ctxt->sax = oldsax;
12067 xmlFreeParserCtxt(ctxt);
12068 newDoc->intSubset = NULL;
12069 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012070 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012071 xmlFreeDoc(newDoc);
12072
12073 return(ret);
12074}
12075
12076/**
12077 * xmlSAXParseEntity:
12078 * @sax: the SAX handler block
12079 * @filename: the filename
12080 *
12081 * parse an XML external entity out of context and build a tree.
12082 * It use the given SAX function block to handle the parsing callback.
12083 * If sax is NULL, fallback to the default DOM tree building routines.
12084 *
12085 * [78] extParsedEnt ::= TextDecl? content
12086 *
12087 * This correspond to a "Well Balanced" chunk
12088 *
12089 * Returns the resulting document tree
12090 */
12091
12092xmlDocPtr
12093xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12094 xmlDocPtr ret;
12095 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012096
12097 ctxt = xmlCreateFileParserCtxt(filename);
12098 if (ctxt == NULL) {
12099 return(NULL);
12100 }
12101 if (sax != NULL) {
12102 if (ctxt->sax != NULL)
12103 xmlFree(ctxt->sax);
12104 ctxt->sax = sax;
12105 ctxt->userData = NULL;
12106 }
12107
Owen Taylor3473f882001-02-23 17:55:21 +000012108 xmlParseExtParsedEnt(ctxt);
12109
12110 if (ctxt->wellFormed)
12111 ret = ctxt->myDoc;
12112 else {
12113 ret = NULL;
12114 xmlFreeDoc(ctxt->myDoc);
12115 ctxt->myDoc = NULL;
12116 }
12117 if (sax != NULL)
12118 ctxt->sax = NULL;
12119 xmlFreeParserCtxt(ctxt);
12120
12121 return(ret);
12122}
12123
12124/**
12125 * xmlParseEntity:
12126 * @filename: the filename
12127 *
12128 * parse an XML external entity out of context and build a tree.
12129 *
12130 * [78] extParsedEnt ::= TextDecl? content
12131 *
12132 * This correspond to a "Well Balanced" chunk
12133 *
12134 * Returns the resulting document tree
12135 */
12136
12137xmlDocPtr
12138xmlParseEntity(const char *filename) {
12139 return(xmlSAXParseEntity(NULL, filename));
12140}
Daniel Veillard81273902003-09-30 00:43:48 +000012141#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012142
12143/**
12144 * xmlCreateEntityParserCtxt:
12145 * @URL: the entity URL
12146 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012147 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012148 *
12149 * Create a parser context for an external entity
12150 * Automatic support for ZLIB/Compress compressed document is provided
12151 * by default if found at compile-time.
12152 *
12153 * Returns the new parser context or NULL
12154 */
12155xmlParserCtxtPtr
12156xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12157 const xmlChar *base) {
12158 xmlParserCtxtPtr ctxt;
12159 xmlParserInputPtr inputStream;
12160 char *directory = NULL;
12161 xmlChar *uri;
12162
12163 ctxt = xmlNewParserCtxt();
12164 if (ctxt == NULL) {
12165 return(NULL);
12166 }
12167
12168 uri = xmlBuildURI(URL, base);
12169
12170 if (uri == NULL) {
12171 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12172 if (inputStream == NULL) {
12173 xmlFreeParserCtxt(ctxt);
12174 return(NULL);
12175 }
12176
12177 inputPush(ctxt, inputStream);
12178
12179 if ((ctxt->directory == NULL) && (directory == NULL))
12180 directory = xmlParserGetDirectory((char *)URL);
12181 if ((ctxt->directory == NULL) && (directory != NULL))
12182 ctxt->directory = directory;
12183 } else {
12184 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12185 if (inputStream == NULL) {
12186 xmlFree(uri);
12187 xmlFreeParserCtxt(ctxt);
12188 return(NULL);
12189 }
12190
12191 inputPush(ctxt, inputStream);
12192
12193 if ((ctxt->directory == NULL) && (directory == NULL))
12194 directory = xmlParserGetDirectory((char *)uri);
12195 if ((ctxt->directory == NULL) && (directory != NULL))
12196 ctxt->directory = directory;
12197 xmlFree(uri);
12198 }
Owen Taylor3473f882001-02-23 17:55:21 +000012199 return(ctxt);
12200}
12201
12202/************************************************************************
12203 * *
12204 * Front ends when parsing from a file *
12205 * *
12206 ************************************************************************/
12207
12208/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012209 * xmlCreateURLParserCtxt:
12210 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012211 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012212 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012213 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012214 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012215 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012216 *
12217 * Returns the new parser context or NULL
12218 */
12219xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012220xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012221{
12222 xmlParserCtxtPtr ctxt;
12223 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012224 char *directory = NULL;
12225
Owen Taylor3473f882001-02-23 17:55:21 +000012226 ctxt = xmlNewParserCtxt();
12227 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012228 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012229 return(NULL);
12230 }
12231
Daniel Veillarddf292f72005-01-16 19:00:15 +000012232 if (options)
12233 xmlCtxtUseOptions(ctxt, options);
12234 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012235
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012236 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012237 if (inputStream == NULL) {
12238 xmlFreeParserCtxt(ctxt);
12239 return(NULL);
12240 }
12241
Owen Taylor3473f882001-02-23 17:55:21 +000012242 inputPush(ctxt, inputStream);
12243 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012244 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012245 if ((ctxt->directory == NULL) && (directory != NULL))
12246 ctxt->directory = directory;
12247
12248 return(ctxt);
12249}
12250
Daniel Veillard61b93382003-11-03 14:28:31 +000012251/**
12252 * xmlCreateFileParserCtxt:
12253 * @filename: the filename
12254 *
12255 * Create a parser context for a file content.
12256 * Automatic support for ZLIB/Compress compressed document is provided
12257 * by default if found at compile-time.
12258 *
12259 * Returns the new parser context or NULL
12260 */
12261xmlParserCtxtPtr
12262xmlCreateFileParserCtxt(const char *filename)
12263{
12264 return(xmlCreateURLParserCtxt(filename, 0));
12265}
12266
Daniel Veillard81273902003-09-30 00:43:48 +000012267#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012268/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012269 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012270 * @sax: the SAX handler block
12271 * @filename: the filename
12272 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12273 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012274 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012275 *
12276 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12277 * compressed document is provided by default if found at compile-time.
12278 * It use the given SAX function block to handle the parsing callback.
12279 * If sax is NULL, fallback to the default DOM tree building routines.
12280 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012281 * User data (void *) is stored within the parser context in the
12282 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012283 *
Owen Taylor3473f882001-02-23 17:55:21 +000012284 * Returns the resulting document tree
12285 */
12286
12287xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012288xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12289 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012290 xmlDocPtr ret;
12291 xmlParserCtxtPtr ctxt;
12292 char *directory = NULL;
12293
Daniel Veillard635ef722001-10-29 11:48:19 +000012294 xmlInitParser();
12295
Owen Taylor3473f882001-02-23 17:55:21 +000012296 ctxt = xmlCreateFileParserCtxt(filename);
12297 if (ctxt == NULL) {
12298 return(NULL);
12299 }
12300 if (sax != NULL) {
12301 if (ctxt->sax != NULL)
12302 xmlFree(ctxt->sax);
12303 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012304 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012305 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012306 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012307 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012308 }
Owen Taylor3473f882001-02-23 17:55:21 +000012309
12310 if ((ctxt->directory == NULL) && (directory == NULL))
12311 directory = xmlParserGetDirectory(filename);
12312 if ((ctxt->directory == NULL) && (directory != NULL))
12313 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12314
Daniel Veillarddad3f682002-11-17 16:47:27 +000012315 ctxt->recovery = recovery;
12316
Owen Taylor3473f882001-02-23 17:55:21 +000012317 xmlParseDocument(ctxt);
12318
William M. Brackc07329e2003-09-08 01:57:30 +000012319 if ((ctxt->wellFormed) || recovery) {
12320 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012321 if (ret != NULL) {
12322 if (ctxt->input->buf->compressed > 0)
12323 ret->compression = 9;
12324 else
12325 ret->compression = ctxt->input->buf->compressed;
12326 }
William M. Brackc07329e2003-09-08 01:57:30 +000012327 }
Owen Taylor3473f882001-02-23 17:55:21 +000012328 else {
12329 ret = NULL;
12330 xmlFreeDoc(ctxt->myDoc);
12331 ctxt->myDoc = NULL;
12332 }
12333 if (sax != NULL)
12334 ctxt->sax = NULL;
12335 xmlFreeParserCtxt(ctxt);
12336
12337 return(ret);
12338}
12339
12340/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012341 * xmlSAXParseFile:
12342 * @sax: the SAX handler block
12343 * @filename: the filename
12344 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12345 * documents
12346 *
12347 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12348 * compressed document is provided by default if found at compile-time.
12349 * It use the given SAX function block to handle the parsing callback.
12350 * If sax is NULL, fallback to the default DOM tree building routines.
12351 *
12352 * Returns the resulting document tree
12353 */
12354
12355xmlDocPtr
12356xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12357 int recovery) {
12358 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12359}
12360
12361/**
Owen Taylor3473f882001-02-23 17:55:21 +000012362 * xmlRecoverDoc:
12363 * @cur: a pointer to an array of xmlChar
12364 *
12365 * parse an XML in-memory document and build a tree.
12366 * In the case the document is not Well Formed, a tree is built anyway
12367 *
12368 * Returns the resulting document tree
12369 */
12370
12371xmlDocPtr
12372xmlRecoverDoc(xmlChar *cur) {
12373 return(xmlSAXParseDoc(NULL, cur, 1));
12374}
12375
12376/**
12377 * xmlParseFile:
12378 * @filename: the filename
12379 *
12380 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12381 * compressed document is provided by default if found at compile-time.
12382 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012383 * Returns the resulting document tree if the file was wellformed,
12384 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012385 */
12386
12387xmlDocPtr
12388xmlParseFile(const char *filename) {
12389 return(xmlSAXParseFile(NULL, filename, 0));
12390}
12391
12392/**
12393 * xmlRecoverFile:
12394 * @filename: the filename
12395 *
12396 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12397 * compressed document is provided by default if found at compile-time.
12398 * In the case the document is not Well Formed, a tree is built anyway
12399 *
12400 * Returns the resulting document tree
12401 */
12402
12403xmlDocPtr
12404xmlRecoverFile(const char *filename) {
12405 return(xmlSAXParseFile(NULL, filename, 1));
12406}
12407
12408
12409/**
12410 * xmlSetupParserForBuffer:
12411 * @ctxt: an XML parser context
12412 * @buffer: a xmlChar * buffer
12413 * @filename: a file name
12414 *
12415 * Setup the parser context to parse a new buffer; Clears any prior
12416 * contents from the parser context. The buffer parameter must not be
12417 * NULL, but the filename parameter can be
12418 */
12419void
12420xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12421 const char* filename)
12422{
12423 xmlParserInputPtr input;
12424
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012425 if ((ctxt == NULL) || (buffer == NULL))
12426 return;
12427
Owen Taylor3473f882001-02-23 17:55:21 +000012428 input = xmlNewInputStream(ctxt);
12429 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012430 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012431 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012432 return;
12433 }
12434
12435 xmlClearParserCtxt(ctxt);
12436 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012437 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012438 input->base = buffer;
12439 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012440 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012441 inputPush(ctxt, input);
12442}
12443
12444/**
12445 * xmlSAXUserParseFile:
12446 * @sax: a SAX handler
12447 * @user_data: The user data returned on SAX callbacks
12448 * @filename: a file name
12449 *
12450 * parse an XML file and call the given SAX handler routines.
12451 * Automatic support for ZLIB/Compress compressed document is provided
12452 *
12453 * Returns 0 in case of success or a error number otherwise
12454 */
12455int
12456xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12457 const char *filename) {
12458 int ret = 0;
12459 xmlParserCtxtPtr ctxt;
12460
12461 ctxt = xmlCreateFileParserCtxt(filename);
12462 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012463#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012464 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012465#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012466 xmlFree(ctxt->sax);
12467 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012468 xmlDetectSAX2(ctxt);
12469
Owen Taylor3473f882001-02-23 17:55:21 +000012470 if (user_data != NULL)
12471 ctxt->userData = user_data;
12472
12473 xmlParseDocument(ctxt);
12474
12475 if (ctxt->wellFormed)
12476 ret = 0;
12477 else {
12478 if (ctxt->errNo != 0)
12479 ret = ctxt->errNo;
12480 else
12481 ret = -1;
12482 }
12483 if (sax != NULL)
12484 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012485 if (ctxt->myDoc != NULL) {
12486 xmlFreeDoc(ctxt->myDoc);
12487 ctxt->myDoc = NULL;
12488 }
Owen Taylor3473f882001-02-23 17:55:21 +000012489 xmlFreeParserCtxt(ctxt);
12490
12491 return ret;
12492}
Daniel Veillard81273902003-09-30 00:43:48 +000012493#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012494
12495/************************************************************************
12496 * *
12497 * Front ends when parsing from memory *
12498 * *
12499 ************************************************************************/
12500
12501/**
12502 * xmlCreateMemoryParserCtxt:
12503 * @buffer: a pointer to a char array
12504 * @size: the size of the array
12505 *
12506 * Create a parser context for an XML in-memory document.
12507 *
12508 * Returns the new parser context or NULL
12509 */
12510xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012511xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012512 xmlParserCtxtPtr ctxt;
12513 xmlParserInputPtr input;
12514 xmlParserInputBufferPtr buf;
12515
12516 if (buffer == NULL)
12517 return(NULL);
12518 if (size <= 0)
12519 return(NULL);
12520
12521 ctxt = xmlNewParserCtxt();
12522 if (ctxt == NULL)
12523 return(NULL);
12524
Daniel Veillard53350552003-09-18 13:35:51 +000012525 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012526 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012527 if (buf == NULL) {
12528 xmlFreeParserCtxt(ctxt);
12529 return(NULL);
12530 }
Owen Taylor3473f882001-02-23 17:55:21 +000012531
12532 input = xmlNewInputStream(ctxt);
12533 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012534 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012535 xmlFreeParserCtxt(ctxt);
12536 return(NULL);
12537 }
12538
12539 input->filename = NULL;
12540 input->buf = buf;
12541 input->base = input->buf->buffer->content;
12542 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012543 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012544
12545 inputPush(ctxt, input);
12546 return(ctxt);
12547}
12548
Daniel Veillard81273902003-09-30 00:43:48 +000012549#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012550/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012551 * xmlSAXParseMemoryWithData:
12552 * @sax: the SAX handler block
12553 * @buffer: an pointer to a char array
12554 * @size: the size of the array
12555 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12556 * documents
12557 * @data: the userdata
12558 *
12559 * parse an XML in-memory block and use the given SAX function block
12560 * to handle the parsing callback. If sax is NULL, fallback to the default
12561 * DOM tree building routines.
12562 *
12563 * User data (void *) is stored within the parser context in the
12564 * context's _private member, so it is available nearly everywhere in libxml
12565 *
12566 * Returns the resulting document tree
12567 */
12568
12569xmlDocPtr
12570xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12571 int size, int recovery, void *data) {
12572 xmlDocPtr ret;
12573 xmlParserCtxtPtr ctxt;
12574
12575 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12576 if (ctxt == NULL) return(NULL);
12577 if (sax != NULL) {
12578 if (ctxt->sax != NULL)
12579 xmlFree(ctxt->sax);
12580 ctxt->sax = sax;
12581 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012582 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012583 if (data!=NULL) {
12584 ctxt->_private=data;
12585 }
12586
Daniel Veillardadba5f12003-04-04 16:09:01 +000012587 ctxt->recovery = recovery;
12588
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012589 xmlParseDocument(ctxt);
12590
12591 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12592 else {
12593 ret = NULL;
12594 xmlFreeDoc(ctxt->myDoc);
12595 ctxt->myDoc = NULL;
12596 }
12597 if (sax != NULL)
12598 ctxt->sax = NULL;
12599 xmlFreeParserCtxt(ctxt);
12600
12601 return(ret);
12602}
12603
12604/**
Owen Taylor3473f882001-02-23 17:55:21 +000012605 * xmlSAXParseMemory:
12606 * @sax: the SAX handler block
12607 * @buffer: an pointer to a char array
12608 * @size: the size of the array
12609 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12610 * documents
12611 *
12612 * parse an XML in-memory block and use the given SAX function block
12613 * to handle the parsing callback. If sax is NULL, fallback to the default
12614 * DOM tree building routines.
12615 *
12616 * Returns the resulting document tree
12617 */
12618xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012619xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12620 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012621 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012622}
12623
12624/**
12625 * xmlParseMemory:
12626 * @buffer: an pointer to a char array
12627 * @size: the size of the array
12628 *
12629 * parse an XML in-memory block and build a tree.
12630 *
12631 * Returns the resulting document tree
12632 */
12633
Daniel Veillard50822cb2001-07-26 20:05:51 +000012634xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012635 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12636}
12637
12638/**
12639 * xmlRecoverMemory:
12640 * @buffer: an pointer to a char array
12641 * @size: the size of the array
12642 *
12643 * parse an XML in-memory block and build a tree.
12644 * In the case the document is not Well Formed, a tree is built anyway
12645 *
12646 * Returns the resulting document tree
12647 */
12648
Daniel Veillard50822cb2001-07-26 20:05:51 +000012649xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012650 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12651}
12652
12653/**
12654 * xmlSAXUserParseMemory:
12655 * @sax: a SAX handler
12656 * @user_data: The user data returned on SAX callbacks
12657 * @buffer: an in-memory XML document input
12658 * @size: the length of the XML document in bytes
12659 *
12660 * A better SAX parsing routine.
12661 * parse an XML in-memory buffer and call the given SAX handler routines.
12662 *
12663 * Returns 0 in case of success or a error number otherwise
12664 */
12665int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012666 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012667 int ret = 0;
12668 xmlParserCtxtPtr ctxt;
12669 xmlSAXHandlerPtr oldsax = NULL;
12670
Daniel Veillard9e923512002-08-14 08:48:52 +000012671 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012672 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12673 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012674 oldsax = ctxt->sax;
12675 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012676 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012677 if (user_data != NULL)
12678 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012679
12680 xmlParseDocument(ctxt);
12681
12682 if (ctxt->wellFormed)
12683 ret = 0;
12684 else {
12685 if (ctxt->errNo != 0)
12686 ret = ctxt->errNo;
12687 else
12688 ret = -1;
12689 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012690 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012691 if (ctxt->myDoc != NULL) {
12692 xmlFreeDoc(ctxt->myDoc);
12693 ctxt->myDoc = NULL;
12694 }
Owen Taylor3473f882001-02-23 17:55:21 +000012695 xmlFreeParserCtxt(ctxt);
12696
12697 return ret;
12698}
Daniel Veillard81273902003-09-30 00:43:48 +000012699#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012700
12701/**
12702 * xmlCreateDocParserCtxt:
12703 * @cur: a pointer to an array of xmlChar
12704 *
12705 * Creates a parser context for an XML in-memory document.
12706 *
12707 * Returns the new parser context or NULL
12708 */
12709xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012710xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012711 int len;
12712
12713 if (cur == NULL)
12714 return(NULL);
12715 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012716 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012717}
12718
Daniel Veillard81273902003-09-30 00:43:48 +000012719#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012720/**
12721 * xmlSAXParseDoc:
12722 * @sax: the SAX handler block
12723 * @cur: a pointer to an array of xmlChar
12724 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12725 * documents
12726 *
12727 * parse an XML in-memory document and build a tree.
12728 * It use the given SAX function block to handle the parsing callback.
12729 * If sax is NULL, fallback to the default DOM tree building routines.
12730 *
12731 * Returns the resulting document tree
12732 */
12733
12734xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012735xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012736 xmlDocPtr ret;
12737 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012738 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012739
Daniel Veillard38936062004-11-04 17:45:11 +000012740 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012741
12742
12743 ctxt = xmlCreateDocParserCtxt(cur);
12744 if (ctxt == NULL) return(NULL);
12745 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012746 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012747 ctxt->sax = sax;
12748 ctxt->userData = NULL;
12749 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012750 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012751
12752 xmlParseDocument(ctxt);
12753 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12754 else {
12755 ret = NULL;
12756 xmlFreeDoc(ctxt->myDoc);
12757 ctxt->myDoc = NULL;
12758 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012759 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012760 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012761 xmlFreeParserCtxt(ctxt);
12762
12763 return(ret);
12764}
12765
12766/**
12767 * xmlParseDoc:
12768 * @cur: a pointer to an array of xmlChar
12769 *
12770 * parse an XML in-memory document and build a tree.
12771 *
12772 * Returns the resulting document tree
12773 */
12774
12775xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012776xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012777 return(xmlSAXParseDoc(NULL, cur, 0));
12778}
Daniel Veillard81273902003-09-30 00:43:48 +000012779#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012780
Daniel Veillard81273902003-09-30 00:43:48 +000012781#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012782/************************************************************************
12783 * *
12784 * Specific function to keep track of entities references *
12785 * and used by the XSLT debugger *
12786 * *
12787 ************************************************************************/
12788
12789static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12790
12791/**
12792 * xmlAddEntityReference:
12793 * @ent : A valid entity
12794 * @firstNode : A valid first node for children of entity
12795 * @lastNode : A valid last node of children entity
12796 *
12797 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12798 */
12799static void
12800xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12801 xmlNodePtr lastNode)
12802{
12803 if (xmlEntityRefFunc != NULL) {
12804 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12805 }
12806}
12807
12808
12809/**
12810 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012811 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012812 *
12813 * Set the function to call call back when a xml reference has been made
12814 */
12815void
12816xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12817{
12818 xmlEntityRefFunc = func;
12819}
Daniel Veillard81273902003-09-30 00:43:48 +000012820#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012821
12822/************************************************************************
12823 * *
12824 * Miscellaneous *
12825 * *
12826 ************************************************************************/
12827
12828#ifdef LIBXML_XPATH_ENABLED
12829#include <libxml/xpath.h>
12830#endif
12831
Daniel Veillardffa3c742005-07-21 13:24:09 +000012832extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012833static int xmlParserInitialized = 0;
12834
12835/**
12836 * xmlInitParser:
12837 *
12838 * Initialization function for the XML parser.
12839 * This is not reentrant. Call once before processing in case of
12840 * use in multithreaded programs.
12841 */
12842
12843void
12844xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012845 if (xmlParserInitialized != 0)
12846 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012847
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012848#ifdef LIBXML_THREAD_ENABLED
12849 __xmlGlobalInitMutexLock();
12850 if (xmlParserInitialized == 0) {
12851#endif
12852 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12853 (xmlGenericError == NULL))
12854 initGenericErrorDefaultFunc(NULL);
12855 xmlInitGlobals();
12856 xmlInitThreads();
12857 xmlInitMemory();
12858 xmlInitCharEncodingHandlers();
12859 xmlDefaultSAXHandlerInit();
12860 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012861#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012862 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012863#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012864#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012865 htmlInitAutoClose();
12866 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012867#endif
12868#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012869 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012870#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012871 xmlParserInitialized = 1;
12872#ifdef LIBXML_THREAD_ENABLED
12873 }
12874 __xmlGlobalInitMutexUnlock();
12875#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012876}
12877
12878/**
12879 * xmlCleanupParser:
12880 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012881 * Cleanup function for the XML library. It tries to reclaim all
12882 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012883 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012884 * function should not prevent reusing the library but one should
12885 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012886 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012887 */
12888
12889void
12890xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012891 if (!xmlParserInitialized)
12892 return;
12893
Owen Taylor3473f882001-02-23 17:55:21 +000012894 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012895#ifdef LIBXML_CATALOG_ENABLED
12896 xmlCatalogCleanup();
12897#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012898 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012899 xmlCleanupInputCallbacks();
12900#ifdef LIBXML_OUTPUT_ENABLED
12901 xmlCleanupOutputCallbacks();
12902#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012903#ifdef LIBXML_SCHEMAS_ENABLED
12904 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012905 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012906#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012907 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012908 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012909 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012910 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012911 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012912}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012913
12914/************************************************************************
12915 * *
12916 * New set (2.6.0) of simpler and more flexible APIs *
12917 * *
12918 ************************************************************************/
12919
12920/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012921 * DICT_FREE:
12922 * @str: a string
12923 *
12924 * Free a string if it is not owned by the "dict" dictionnary in the
12925 * current scope
12926 */
12927#define DICT_FREE(str) \
12928 if ((str) && ((!dict) || \
12929 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12930 xmlFree((char *)(str));
12931
12932/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012933 * xmlCtxtReset:
12934 * @ctxt: an XML parser context
12935 *
12936 * Reset a parser context
12937 */
12938void
12939xmlCtxtReset(xmlParserCtxtPtr ctxt)
12940{
12941 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012942 xmlDictPtr dict;
12943
12944 if (ctxt == NULL)
12945 return;
12946
12947 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012948
12949 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12950 xmlFreeInputStream(input);
12951 }
12952 ctxt->inputNr = 0;
12953 ctxt->input = NULL;
12954
12955 ctxt->spaceNr = 0;
12956 ctxt->spaceTab[0] = -1;
12957 ctxt->space = &ctxt->spaceTab[0];
12958
12959
12960 ctxt->nodeNr = 0;
12961 ctxt->node = NULL;
12962
12963 ctxt->nameNr = 0;
12964 ctxt->name = NULL;
12965
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012966 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012967 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012968 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012970 DICT_FREE(ctxt->directory);
12971 ctxt->directory = NULL;
12972 DICT_FREE(ctxt->extSubURI);
12973 ctxt->extSubURI = NULL;
12974 DICT_FREE(ctxt->extSubSystem);
12975 ctxt->extSubSystem = NULL;
12976 if (ctxt->myDoc != NULL)
12977 xmlFreeDoc(ctxt->myDoc);
12978 ctxt->myDoc = NULL;
12979
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012980 ctxt->standalone = -1;
12981 ctxt->hasExternalSubset = 0;
12982 ctxt->hasPErefs = 0;
12983 ctxt->html = 0;
12984 ctxt->external = 0;
12985 ctxt->instate = XML_PARSER_START;
12986 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012987
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012988 ctxt->wellFormed = 1;
12989 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012990 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012991 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012992#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012993 ctxt->vctxt.userData = ctxt;
12994 ctxt->vctxt.error = xmlParserValidityError;
12995 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012996#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012997 ctxt->record_info = 0;
12998 ctxt->nbChars = 0;
12999 ctxt->checkIndex = 0;
13000 ctxt->inSubset = 0;
13001 ctxt->errNo = XML_ERR_OK;
13002 ctxt->depth = 0;
13003 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13004 ctxt->catalogs = NULL;
13005 xmlInitNodeInfoSeq(&ctxt->node_seq);
13006
13007 if (ctxt->attsDefault != NULL) {
13008 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13009 ctxt->attsDefault = NULL;
13010 }
13011 if (ctxt->attsSpecial != NULL) {
13012 xmlHashFree(ctxt->attsSpecial, NULL);
13013 ctxt->attsSpecial = NULL;
13014 }
13015
Daniel Veillard4432df22003-09-28 18:58:27 +000013016#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013017 if (ctxt->catalogs != NULL)
13018 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013019#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013020 if (ctxt->lastError.code != XML_ERR_OK)
13021 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013022}
13023
13024/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013025 * xmlCtxtResetPush:
13026 * @ctxt: an XML parser context
13027 * @chunk: a pointer to an array of chars
13028 * @size: number of chars in the array
13029 * @filename: an optional file name or URI
13030 * @encoding: the document encoding, or NULL
13031 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013032 * Reset a push parser context
13033 *
13034 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013035 */
13036int
13037xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13038 int size, const char *filename, const char *encoding)
13039{
13040 xmlParserInputPtr inputStream;
13041 xmlParserInputBufferPtr buf;
13042 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13043
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013044 if (ctxt == NULL)
13045 return(1);
13046
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013047 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13048 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13049
13050 buf = xmlAllocParserInputBuffer(enc);
13051 if (buf == NULL)
13052 return(1);
13053
13054 if (ctxt == NULL) {
13055 xmlFreeParserInputBuffer(buf);
13056 return(1);
13057 }
13058
13059 xmlCtxtReset(ctxt);
13060
13061 if (ctxt->pushTab == NULL) {
13062 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13063 sizeof(xmlChar *));
13064 if (ctxt->pushTab == NULL) {
13065 xmlErrMemory(ctxt, NULL);
13066 xmlFreeParserInputBuffer(buf);
13067 return(1);
13068 }
13069 }
13070
13071 if (filename == NULL) {
13072 ctxt->directory = NULL;
13073 } else {
13074 ctxt->directory = xmlParserGetDirectory(filename);
13075 }
13076
13077 inputStream = xmlNewInputStream(ctxt);
13078 if (inputStream == NULL) {
13079 xmlFreeParserInputBuffer(buf);
13080 return(1);
13081 }
13082
13083 if (filename == NULL)
13084 inputStream->filename = NULL;
13085 else
13086 inputStream->filename = (char *)
13087 xmlCanonicPath((const xmlChar *) filename);
13088 inputStream->buf = buf;
13089 inputStream->base = inputStream->buf->buffer->content;
13090 inputStream->cur = inputStream->buf->buffer->content;
13091 inputStream->end =
13092 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13093
13094 inputPush(ctxt, inputStream);
13095
13096 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13097 (ctxt->input->buf != NULL)) {
13098 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13099 int cur = ctxt->input->cur - ctxt->input->base;
13100
13101 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13102
13103 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13104 ctxt->input->cur = ctxt->input->base + cur;
13105 ctxt->input->end =
13106 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13107 use];
13108#ifdef DEBUG_PUSH
13109 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13110#endif
13111 }
13112
13113 if (encoding != NULL) {
13114 xmlCharEncodingHandlerPtr hdlr;
13115
13116 hdlr = xmlFindCharEncodingHandler(encoding);
13117 if (hdlr != NULL) {
13118 xmlSwitchToEncoding(ctxt, hdlr);
13119 } else {
13120 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13121 "Unsupported encoding %s\n", BAD_CAST encoding);
13122 }
13123 } else if (enc != XML_CHAR_ENCODING_NONE) {
13124 xmlSwitchEncoding(ctxt, enc);
13125 }
13126
13127 return(0);
13128}
13129
13130/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013131 * xmlCtxtUseOptions:
13132 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013133 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013134 *
13135 * Applies the options to the parser context
13136 *
13137 * Returns 0 in case of success, the set of unknown or unimplemented options
13138 * in case of error.
13139 */
13140int
13141xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13142{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013143 if (ctxt == NULL)
13144 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013145 if (options & XML_PARSE_RECOVER) {
13146 ctxt->recovery = 1;
13147 options -= XML_PARSE_RECOVER;
13148 } else
13149 ctxt->recovery = 0;
13150 if (options & XML_PARSE_DTDLOAD) {
13151 ctxt->loadsubset = XML_DETECT_IDS;
13152 options -= XML_PARSE_DTDLOAD;
13153 } else
13154 ctxt->loadsubset = 0;
13155 if (options & XML_PARSE_DTDATTR) {
13156 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13157 options -= XML_PARSE_DTDATTR;
13158 }
13159 if (options & XML_PARSE_NOENT) {
13160 ctxt->replaceEntities = 1;
13161 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13162 options -= XML_PARSE_NOENT;
13163 } else
13164 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013165 if (options & XML_PARSE_PEDANTIC) {
13166 ctxt->pedantic = 1;
13167 options -= XML_PARSE_PEDANTIC;
13168 } else
13169 ctxt->pedantic = 0;
13170 if (options & XML_PARSE_NOBLANKS) {
13171 ctxt->keepBlanks = 0;
13172 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13173 options -= XML_PARSE_NOBLANKS;
13174 } else
13175 ctxt->keepBlanks = 1;
13176 if (options & XML_PARSE_DTDVALID) {
13177 ctxt->validate = 1;
13178 if (options & XML_PARSE_NOWARNING)
13179 ctxt->vctxt.warning = NULL;
13180 if (options & XML_PARSE_NOERROR)
13181 ctxt->vctxt.error = NULL;
13182 options -= XML_PARSE_DTDVALID;
13183 } else
13184 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013185 if (options & XML_PARSE_NOWARNING) {
13186 ctxt->sax->warning = NULL;
13187 options -= XML_PARSE_NOWARNING;
13188 }
13189 if (options & XML_PARSE_NOERROR) {
13190 ctxt->sax->error = NULL;
13191 ctxt->sax->fatalError = NULL;
13192 options -= XML_PARSE_NOERROR;
13193 }
Daniel Veillard81273902003-09-30 00:43:48 +000013194#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013195 if (options & XML_PARSE_SAX1) {
13196 ctxt->sax->startElement = xmlSAX2StartElement;
13197 ctxt->sax->endElement = xmlSAX2EndElement;
13198 ctxt->sax->startElementNs = NULL;
13199 ctxt->sax->endElementNs = NULL;
13200 ctxt->sax->initialized = 1;
13201 options -= XML_PARSE_SAX1;
13202 }
Daniel Veillard81273902003-09-30 00:43:48 +000013203#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013204 if (options & XML_PARSE_NODICT) {
13205 ctxt->dictNames = 0;
13206 options -= XML_PARSE_NODICT;
13207 } else {
13208 ctxt->dictNames = 1;
13209 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013210 if (options & XML_PARSE_NOCDATA) {
13211 ctxt->sax->cdataBlock = NULL;
13212 options -= XML_PARSE_NOCDATA;
13213 }
13214 if (options & XML_PARSE_NSCLEAN) {
13215 ctxt->options |= XML_PARSE_NSCLEAN;
13216 options -= XML_PARSE_NSCLEAN;
13217 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013218 if (options & XML_PARSE_NONET) {
13219 ctxt->options |= XML_PARSE_NONET;
13220 options -= XML_PARSE_NONET;
13221 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013222 if (options & XML_PARSE_COMPACT) {
13223 ctxt->options |= XML_PARSE_COMPACT;
13224 options -= XML_PARSE_COMPACT;
13225 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013226 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013227 return (options);
13228}
13229
13230/**
13231 * xmlDoRead:
13232 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013233 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013234 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013235 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236 * @reuse: keep the context for reuse
13237 *
13238 * Common front-end for the xmlRead functions
13239 *
13240 * Returns the resulting document tree or NULL
13241 */
13242static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013243xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13244 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013245{
13246 xmlDocPtr ret;
13247
13248 xmlCtxtUseOptions(ctxt, options);
13249 if (encoding != NULL) {
13250 xmlCharEncodingHandlerPtr hdlr;
13251
13252 hdlr = xmlFindCharEncodingHandler(encoding);
13253 if (hdlr != NULL)
13254 xmlSwitchToEncoding(ctxt, hdlr);
13255 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013256 if ((URL != NULL) && (ctxt->input != NULL) &&
13257 (ctxt->input->filename == NULL))
13258 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013259 xmlParseDocument(ctxt);
13260 if ((ctxt->wellFormed) || ctxt->recovery)
13261 ret = ctxt->myDoc;
13262 else {
13263 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013264 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013265 xmlFreeDoc(ctxt->myDoc);
13266 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013267 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013268 ctxt->myDoc = NULL;
13269 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013270 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013271 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013272
13273 return (ret);
13274}
13275
13276/**
13277 * xmlReadDoc:
13278 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013279 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013281 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013282 *
13283 * parse an XML in-memory document and build a tree.
13284 *
13285 * Returns the resulting document tree
13286 */
13287xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013288xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289{
13290 xmlParserCtxtPtr ctxt;
13291
13292 if (cur == NULL)
13293 return (NULL);
13294
13295 ctxt = xmlCreateDocParserCtxt(cur);
13296 if (ctxt == NULL)
13297 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013298 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013299}
13300
13301/**
13302 * xmlReadFile:
13303 * @filename: a file or URL
13304 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013305 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013306 *
13307 * parse an XML file from the filesystem or the network.
13308 *
13309 * Returns the resulting document tree
13310 */
13311xmlDocPtr
13312xmlReadFile(const char *filename, const char *encoding, int options)
13313{
13314 xmlParserCtxtPtr ctxt;
13315
Daniel Veillard61b93382003-11-03 14:28:31 +000013316 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013317 if (ctxt == NULL)
13318 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013319 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013320}
13321
13322/**
13323 * xmlReadMemory:
13324 * @buffer: a pointer to a char array
13325 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013326 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013327 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013328 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013329 *
13330 * parse an XML in-memory document and build a tree.
13331 *
13332 * Returns the resulting document tree
13333 */
13334xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013335xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013336{
13337 xmlParserCtxtPtr ctxt;
13338
13339 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13340 if (ctxt == NULL)
13341 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013342 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013343}
13344
13345/**
13346 * xmlReadFd:
13347 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013348 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013349 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013350 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013351 *
13352 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013353 * NOTE that the file descriptor will not be closed when the
13354 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013355 *
13356 * Returns the resulting document tree
13357 */
13358xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013359xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013360{
13361 xmlParserCtxtPtr ctxt;
13362 xmlParserInputBufferPtr input;
13363 xmlParserInputPtr stream;
13364
13365 if (fd < 0)
13366 return (NULL);
13367
13368 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13369 if (input == NULL)
13370 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013371 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013372 ctxt = xmlNewParserCtxt();
13373 if (ctxt == NULL) {
13374 xmlFreeParserInputBuffer(input);
13375 return (NULL);
13376 }
13377 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13378 if (stream == NULL) {
13379 xmlFreeParserInputBuffer(input);
13380 xmlFreeParserCtxt(ctxt);
13381 return (NULL);
13382 }
13383 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013384 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013385}
13386
13387/**
13388 * xmlReadIO:
13389 * @ioread: an I/O read function
13390 * @ioclose: an I/O close function
13391 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013392 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013393 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013394 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013395 *
13396 * parse an XML document from I/O functions and source and build a tree.
13397 *
13398 * Returns the resulting document tree
13399 */
13400xmlDocPtr
13401xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013402 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013403{
13404 xmlParserCtxtPtr ctxt;
13405 xmlParserInputBufferPtr input;
13406 xmlParserInputPtr stream;
13407
13408 if (ioread == NULL)
13409 return (NULL);
13410
13411 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13412 XML_CHAR_ENCODING_NONE);
13413 if (input == NULL)
13414 return (NULL);
13415 ctxt = xmlNewParserCtxt();
13416 if (ctxt == NULL) {
13417 xmlFreeParserInputBuffer(input);
13418 return (NULL);
13419 }
13420 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13421 if (stream == NULL) {
13422 xmlFreeParserInputBuffer(input);
13423 xmlFreeParserCtxt(ctxt);
13424 return (NULL);
13425 }
13426 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013427 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013428}
13429
13430/**
13431 * xmlCtxtReadDoc:
13432 * @ctxt: an XML parser context
13433 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013434 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013435 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013436 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013437 *
13438 * parse an XML in-memory document and build a tree.
13439 * This reuses the existing @ctxt parser context
13440 *
13441 * Returns the resulting document tree
13442 */
13443xmlDocPtr
13444xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013445 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013446{
13447 xmlParserInputPtr stream;
13448
13449 if (cur == NULL)
13450 return (NULL);
13451 if (ctxt == NULL)
13452 return (NULL);
13453
13454 xmlCtxtReset(ctxt);
13455
13456 stream = xmlNewStringInputStream(ctxt, cur);
13457 if (stream == NULL) {
13458 return (NULL);
13459 }
13460 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013461 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013462}
13463
13464/**
13465 * xmlCtxtReadFile:
13466 * @ctxt: an XML parser context
13467 * @filename: a file or URL
13468 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013469 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013470 *
13471 * parse an XML file from the filesystem or the network.
13472 * This reuses the existing @ctxt parser context
13473 *
13474 * Returns the resulting document tree
13475 */
13476xmlDocPtr
13477xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13478 const char *encoding, int options)
13479{
13480 xmlParserInputPtr stream;
13481
13482 if (filename == NULL)
13483 return (NULL);
13484 if (ctxt == NULL)
13485 return (NULL);
13486
13487 xmlCtxtReset(ctxt);
13488
Daniel Veillard29614c72004-11-26 10:47:26 +000013489 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013490 if (stream == NULL) {
13491 return (NULL);
13492 }
13493 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013494 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013495}
13496
13497/**
13498 * xmlCtxtReadMemory:
13499 * @ctxt: an XML parser context
13500 * @buffer: a pointer to a char array
13501 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013502 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013503 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013504 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013505 *
13506 * parse an XML in-memory document and build a tree.
13507 * This reuses the existing @ctxt parser context
13508 *
13509 * Returns the resulting document tree
13510 */
13511xmlDocPtr
13512xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013513 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013514{
13515 xmlParserInputBufferPtr input;
13516 xmlParserInputPtr stream;
13517
13518 if (ctxt == NULL)
13519 return (NULL);
13520 if (buffer == NULL)
13521 return (NULL);
13522
13523 xmlCtxtReset(ctxt);
13524
13525 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13526 if (input == NULL) {
13527 return(NULL);
13528 }
13529
13530 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13531 if (stream == NULL) {
13532 xmlFreeParserInputBuffer(input);
13533 return(NULL);
13534 }
13535
13536 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013537 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013538}
13539
13540/**
13541 * xmlCtxtReadFd:
13542 * @ctxt: an XML parser context
13543 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013544 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013545 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013546 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013547 *
13548 * parse an XML from a file descriptor and build a tree.
13549 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013550 * NOTE that the file descriptor will not be closed when the
13551 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013552 *
13553 * Returns the resulting document tree
13554 */
13555xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013556xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13557 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013558{
13559 xmlParserInputBufferPtr input;
13560 xmlParserInputPtr stream;
13561
13562 if (fd < 0)
13563 return (NULL);
13564 if (ctxt == NULL)
13565 return (NULL);
13566
13567 xmlCtxtReset(ctxt);
13568
13569
13570 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13571 if (input == NULL)
13572 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013573 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013574 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13575 if (stream == NULL) {
13576 xmlFreeParserInputBuffer(input);
13577 return (NULL);
13578 }
13579 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013580 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013581}
13582
13583/**
13584 * xmlCtxtReadIO:
13585 * @ctxt: an XML parser context
13586 * @ioread: an I/O read function
13587 * @ioclose: an I/O close function
13588 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013589 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013590 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013591 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013592 *
13593 * parse an XML document from I/O functions and source and build a tree.
13594 * This reuses the existing @ctxt parser context
13595 *
13596 * Returns the resulting document tree
13597 */
13598xmlDocPtr
13599xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13600 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013601 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013602 const char *encoding, int options)
13603{
13604 xmlParserInputBufferPtr input;
13605 xmlParserInputPtr stream;
13606
13607 if (ioread == NULL)
13608 return (NULL);
13609 if (ctxt == NULL)
13610 return (NULL);
13611
13612 xmlCtxtReset(ctxt);
13613
13614 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13615 XML_CHAR_ENCODING_NONE);
13616 if (input == NULL)
13617 return (NULL);
13618 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13619 if (stream == NULL) {
13620 xmlFreeParserInputBuffer(input);
13621 return (NULL);
13622 }
13623 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013624 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013625}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013626
13627#define bottom_parser
13628#include "elfgcchack.h"