blob: 817d8d82a9049f25075b965e173510d634ce035b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000150 if (ctxt != NULL)
151 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000152 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000153 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000154 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
155 (const char *) localname, NULL, NULL, 0, 0,
156 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000157 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000158 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000159 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
160 (const char *) prefix, (const char *) localname,
161 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
162 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000163 if (ctxt != NULL) {
164 ctxt->wellFormed = 0;
165 if (ctxt->recovery == 0)
166 ctxt->disableSAX = 1;
167 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000168}
169
170/**
171 * xmlFatalErr:
172 * @ctxt: an XML parser context
173 * @error: the error number
174 * @extra: extra information string
175 *
176 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
177 */
178static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000179xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000180{
181 const char *errmsg;
182
Daniel Veillard157fee02003-10-31 10:36:03 +0000183 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
184 (ctxt->instate == XML_PARSER_EOF))
185 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000186 switch (error) {
187 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid hexadecimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid decimal value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "CharRef: invalid value\n";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "internal error";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference at end of document\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in prolog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference in epilog\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: no name\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "PEReference: expecting ';'\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "Detected an entity reference loop\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "EntityValue: \" or ' expected\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "PEReferences forbidden in internal subset\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "EntityValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "AttValue: \" or ' expected\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "Unescaped '<' not allowed in attributes values\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "SystemLiteral \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Unfinished System or Public ID \" or ' expected\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "Sequence ']]>' not allowed in content\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "PUBLIC, the Public Identifier is missing\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "Comment must not contain '--' (double-hyphen)\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "xmlParsePI : no target name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "Invalid PI name\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "NOTATION: Name expected here\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "'>' required to close NOTATION declaration\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Entity value required\n";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "Fragment not allowed";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "'(' required to start ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "NmToken expected in ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "')' required to finish ATTLIST enumeration\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : Name or '(' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
288 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000290 errmsg =
291 "PEReference: forbidden within markup decl in internal subset\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "expected '>'\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "XML conditional section '[' expected\n";
298 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000299 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300 errmsg = "Content error in the external subset\n";
301 break;
302 case XML_ERR_CONDSEC_INVALID_KEYWORD:
303 errmsg =
304 "conditional section INCLUDE or IGNORE keyword expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "XML conditional section not closed\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "Text declaration '<?xml' required\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "parsing XML declaration: '?>' expected\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "external parsed entities cannot be standalone\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityRef: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "DOCTYPE improperly terminated\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EndTag: '</' not found\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "expected '='\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not closed expecting \" or '\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "String not started expecting ' or \"\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Invalid XML encoding name\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "standalone accepts only 'yes' or 'no'\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Document is empty\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Extra content at the end of the document\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "chunk is not well balanced\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "extra content at the end of well balanced chunk\n";
353 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000354 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Malformed declaration expecting version\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 case:
359 errmsg = "\n";
360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 default:
363 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000365 if (ctxt != NULL)
366 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000367 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
369 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000370 if (ctxt != NULL) {
371 ctxt->wellFormed = 0;
372 if (ctxt->recovery == 0)
373 ctxt->disableSAX = 1;
374 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375}
376
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000377/**
378 * xmlFatalErrMsg:
379 * @ctxt: an XML parser context
380 * @error: the error number
381 * @msg: the error message
382 *
383 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
384 */
385static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
387 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000388{
Daniel Veillard157fee02003-10-31 10:36:03 +0000389 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
390 (ctxt->instate == XML_PARSER_EOF))
391 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000392 if (ctxt != NULL)
393 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000394 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000396 if (ctxt != NULL) {
397 ctxt->wellFormed = 0;
398 if (ctxt->recovery == 0)
399 ctxt->disableSAX = 1;
400 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000401}
402
403/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000404 * xmlWarningMsg:
405 * @ctxt: an XML parser context
406 * @error: the error number
407 * @msg: the error message
408 * @str1: extra data
409 * @str2: extra data
410 *
411 * Handle a warning.
412 */
413static void
414xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
415 const char *msg, const xmlChar *str1, const xmlChar *str2)
416{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000417 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000418
Daniel Veillard157fee02003-10-31 10:36:03 +0000419 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
420 (ctxt->instate == XML_PARSER_EOF))
421 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000422 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
423 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 schannel = ctxt->sax->serror;
425 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000426 (ctxt->sax) ? ctxt->sax->warning : NULL,
427 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000428 ctxt, NULL, XML_FROM_PARSER, error,
429 XML_ERR_WARNING, NULL, 0,
430 (const char *) str1, (const char *) str2, NULL, 0, 0,
431 msg, (const char *) str1, (const char *) str2);
432}
433
434/**
435 * xmlValidityError:
436 * @ctxt: an XML parser context
437 * @error: the error number
438 * @msg: the error message
439 * @str1: extra data
440 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000441 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 */
443static void
444xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
445 const char *msg, const xmlChar *str1)
446{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000447 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000448
449 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
450 (ctxt->instate == XML_PARSER_EOF))
451 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000452 if (ctxt != NULL) {
453 ctxt->errNo = error;
454 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
455 schannel = ctxt->sax->serror;
456 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000457 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000458 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000459 ctxt, NULL, XML_FROM_DTD, error,
460 XML_ERR_ERROR, NULL, 0, (const char *) str1,
461 NULL, NULL, 0, 0,
462 msg, (const char *) str1);
Daniel Veillard30e76072006-03-09 14:13:55 +0000463 if (ctxt != NULL) {
464 ctxt->valid = 0;
465 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466}
467
468/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000469 * xmlFatalErrMsgInt:
470 * @ctxt: an XML parser context
471 * @error: the error number
472 * @msg: the error message
473 * @val: an integer value
474 *
475 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
476 */
477static void
478xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000480{
Daniel Veillard157fee02003-10-31 10:36:03 +0000481 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
482 (ctxt->instate == XML_PARSER_EOF))
483 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000484 if (ctxt != NULL)
485 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000486 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
488 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000489 if (ctxt != NULL) {
490 ctxt->wellFormed = 0;
491 if (ctxt->recovery == 0)
492 ctxt->disableSAX = 1;
493 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000494}
495
496/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000497 * xmlFatalErrMsgStrIntStr:
498 * @ctxt: an XML parser context
499 * @error: the error number
500 * @msg: the error message
501 * @str1: an string info
502 * @val: an integer value
503 * @str2: an string info
504 *
505 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
506 */
507static void
508xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
509 const char *msg, const xmlChar *str1, int val,
510 const xmlChar *str2)
511{
Daniel Veillard157fee02003-10-31 10:36:03 +0000512 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
513 (ctxt->instate == XML_PARSER_EOF))
514 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL)
516 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000517 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000518 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
519 NULL, 0, (const char *) str1, (const char *) str2,
520 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL) {
522 ctxt->wellFormed = 0;
523 if (ctxt->recovery == 0)
524 ctxt->disableSAX = 1;
525 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000526}
527
528/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 * xmlFatalErrMsgStr:
530 * @ctxt: an XML parser context
531 * @error: the error number
532 * @msg: the error message
533 * @val: a string value
534 *
535 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
536 */
537static void
538xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000540{
Daniel Veillard157fee02003-10-31 10:36:03 +0000541 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
542 (ctxt->instate == XML_PARSER_EOF))
543 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000544 if (ctxt != NULL)
545 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 XML_FROM_PARSER, error, XML_ERR_FATAL,
548 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
549 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000555}
556
557/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000558 * xmlErrMsgStr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 * @val: a string value
563 *
564 * Handle a non fatal parser error
565 */
566static void
567xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg, const xmlChar * val)
569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000576 XML_FROM_PARSER, error, XML_ERR_ERROR,
577 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
578 val);
579}
580
581/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 * xmlNsErr:
583 * @ctxt: an XML parser context
584 * @error: the error number
585 * @msg: the message
586 * @info1: extra information string
587 * @info2: extra information string
588 *
589 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590 */
591static void
592xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000594 const xmlChar * info1, const xmlChar * info2,
595 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596{
Daniel Veillard157fee02003-10-31 10:36:03 +0000597 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598 (ctxt->instate == XML_PARSER_EOF))
599 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000600 if (ctxt != NULL)
601 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000602 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 XML_ERR_ERROR, NULL, 0, (const char *) info1,
604 (const char *) info2, (const char *) info3, 0, 0, msg,
605 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000606 if (ctxt != NULL)
607 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000608}
609
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000610/************************************************************************
611 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000612 * Library wide options *
613 * *
614 ************************************************************************/
615
616/**
617 * xmlHasFeature:
618 * @feature: the feature to be examined
619 *
620 * Examines if the library has been compiled with a given feature.
621 *
622 * Returns a non-zero value if the feature exist, otherwise zero.
623 * Returns zero (0) if the feature does not exist or an unknown
624 * unknown feature is requested, non-zero otherwise.
625 */
626int
627xmlHasFeature(xmlFeature feature)
628{
629 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000630 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000631#ifdef LIBXML_THREAD_ENABLED
632 return(1);
633#else
634 return(0);
635#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000636 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000637#ifdef LIBXML_TREE_ENABLED
638 return(1);
639#else
640 return(0);
641#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000642 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000643#ifdef LIBXML_OUTPUT_ENABLED
644 return(1);
645#else
646 return(0);
647#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000648 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000649#ifdef LIBXML_PUSH_ENABLED
650 return(1);
651#else
652 return(0);
653#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000654 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000655#ifdef LIBXML_READER_ENABLED
656 return(1);
657#else
658 return(0);
659#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000660 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000661#ifdef LIBXML_PATTERN_ENABLED
662 return(1);
663#else
664 return(0);
665#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000666 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000667#ifdef LIBXML_WRITER_ENABLED
668 return(1);
669#else
670 return(0);
671#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000672 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000673#ifdef LIBXML_SAX1_ENABLED
674 return(1);
675#else
676 return(0);
677#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000678 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000679#ifdef LIBXML_FTP_ENABLED
680 return(1);
681#else
682 return(0);
683#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000684 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000685#ifdef LIBXML_HTTP_ENABLED
686 return(1);
687#else
688 return(0);
689#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000690 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000691#ifdef LIBXML_VALID_ENABLED
692 return(1);
693#else
694 return(0);
695#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000696 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000697#ifdef LIBXML_HTML_ENABLED
698 return(1);
699#else
700 return(0);
701#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000702 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000703#ifdef LIBXML_LEGACY_ENABLED
704 return(1);
705#else
706 return(0);
707#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000708 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000709#ifdef LIBXML_C14N_ENABLED
710 return(1);
711#else
712 return(0);
713#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000714 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000715#ifdef LIBXML_CATALOG_ENABLED
716 return(1);
717#else
718 return(0);
719#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000720 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000721#ifdef LIBXML_XPATH_ENABLED
722 return(1);
723#else
724 return(0);
725#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000726 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000727#ifdef LIBXML_XPTR_ENABLED
728 return(1);
729#else
730 return(0);
731#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000732 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000733#ifdef LIBXML_XINCLUDE_ENABLED
734 return(1);
735#else
736 return(0);
737#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000738 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000739#ifdef LIBXML_ICONV_ENABLED
740 return(1);
741#else
742 return(0);
743#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000744 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000745#ifdef LIBXML_ISO8859X_ENABLED
746 return(1);
747#else
748 return(0);
749#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000750 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000751#ifdef LIBXML_UNICODE_ENABLED
752 return(1);
753#else
754 return(0);
755#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_REGEXP_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_AUTOMATA_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_EXPR_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_SCHEMAS_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_SCHEMATRON_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_MODULES_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_DEBUG_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef DEBUG_MEMORY_LOCATION
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_DEBUG_RUNTIME
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000810 case XML_WITH_ZLIB:
811#ifdef LIBXML_ZLIB_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816 default:
817 break;
818 }
819 return(0);
820}
821
822/************************************************************************
823 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000824 * SAX2 defaulted attributes handling *
825 * *
826 ************************************************************************/
827
828/**
829 * xmlDetectSAX2:
830 * @ctxt: an XML parser context
831 *
832 * Do the SAX2 detection and specific intialization
833 */
834static void
835xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
836 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000837#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000838 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
839 ((ctxt->sax->startElementNs != NULL) ||
840 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000841#else
842 ctxt->sax2 = 1;
843#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000844
845 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
846 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
847 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000848 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
849 (ctxt->str_xml_ns == NULL)) {
850 xmlErrMemory(ctxt, NULL);
851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000852}
853
Daniel Veillarde57ec792003-09-10 10:50:59 +0000854typedef struct _xmlDefAttrs xmlDefAttrs;
855typedef xmlDefAttrs *xmlDefAttrsPtr;
856struct _xmlDefAttrs {
857 int nbAttrs; /* number of defaulted attributes on that element */
858 int maxAttrs; /* the size of the array */
859 const xmlChar *values[4]; /* array of localname/prefix/values */
860};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000861
862/**
863 * xmlAddDefAttrs:
864 * @ctxt: an XML parser context
865 * @fullname: the element fullname
866 * @fullattr: the attribute fullname
867 * @value: the attribute value
868 *
869 * Add a defaulted attribute for an element
870 */
871static void
872xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
873 const xmlChar *fullname,
874 const xmlChar *fullattr,
875 const xmlChar *value) {
876 xmlDefAttrsPtr defaults;
877 int len;
878 const xmlChar *name;
879 const xmlChar *prefix;
880
881 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000882 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000883 if (ctxt->attsDefault == NULL)
884 goto mem_error;
885 }
886
887 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000888 * split the element name into prefix:localname , the string found
889 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000890 */
891 name = xmlSplitQName3(fullname, &len);
892 if (name == NULL) {
893 name = xmlDictLookup(ctxt->dict, fullname, -1);
894 prefix = NULL;
895 } else {
896 name = xmlDictLookup(ctxt->dict, name, -1);
897 prefix = xmlDictLookup(ctxt->dict, fullname, len);
898 }
899
900 /*
901 * make sure there is some storage
902 */
903 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
904 if (defaults == NULL) {
905 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000906 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000907 if (defaults == NULL)
908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000909 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000910 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
912 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000913 xmlDefAttrsPtr temp;
914
915 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000917 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000919 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 defaults->maxAttrs *= 2;
921 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
922 }
923
924 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000925 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000926 * are within the DTD and hen not associated to namespace names.
927 */
928 name = xmlSplitQName3(fullattr, &len);
929 if (name == NULL) {
930 name = xmlDictLookup(ctxt->dict, fullattr, -1);
931 prefix = NULL;
932 } else {
933 name = xmlDictLookup(ctxt->dict, name, -1);
934 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
935 }
936
937 defaults->values[4 * defaults->nbAttrs] = name;
938 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
939 /* intern the string and precompute the end */
940 len = xmlStrlen(value);
941 value = xmlDictLookup(ctxt->dict, value, len);
942 defaults->values[4 * defaults->nbAttrs + 2] = value;
943 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
944 defaults->nbAttrs++;
945
946 return;
947
948mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000949 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 return;
951}
952
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000953/**
954 * xmlAddSpecialAttr:
955 * @ctxt: an XML parser context
956 * @fullname: the element fullname
957 * @fullattr: the attribute fullname
958 * @type: the attribute type
959 *
960 * Register that this attribute is not CDATA
961 */
962static void
963xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
964 const xmlChar *fullname,
965 const xmlChar *fullattr,
966 int type)
967{
968 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000969 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000970 if (ctxt->attsSpecial == NULL)
971 goto mem_error;
972 }
973
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000974 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
975 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000976 return;
977
978mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000979 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000980 return;
981}
982
Daniel Veillard4432df22003-09-28 18:58:27 +0000983/**
984 * xmlCheckLanguageID:
985 * @lang: pointer to the string value
986 *
987 * Checks that the value conforms to the LanguageID production:
988 *
989 * NOTE: this is somewhat deprecated, those productions were removed from
990 * the XML Second edition.
991 *
992 * [33] LanguageID ::= Langcode ('-' Subcode)*
993 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
994 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
995 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
996 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
997 * [38] Subcode ::= ([a-z] | [A-Z])+
998 *
999 * Returns 1 if correct 0 otherwise
1000 **/
1001int
1002xmlCheckLanguageID(const xmlChar * lang)
1003{
1004 const xmlChar *cur = lang;
1005
1006 if (cur == NULL)
1007 return (0);
1008 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1009 ((cur[0] == 'I') && (cur[1] == '-'))) {
1010 /*
1011 * IANA code
1012 */
1013 cur += 2;
1014 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1015 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1016 cur++;
1017 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1018 ((cur[0] == 'X') && (cur[1] == '-'))) {
1019 /*
1020 * User code
1021 */
1022 cur += 2;
1023 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1024 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1025 cur++;
1026 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1027 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1028 /*
1029 * ISO639
1030 */
1031 cur++;
1032 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1033 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1034 cur++;
1035 else
1036 return (0);
1037 } else
1038 return (0);
1039 while (cur[0] != 0) { /* non input consuming */
1040 if (cur[0] != '-')
1041 return (0);
1042 cur++;
1043 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1044 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1045 cur++;
1046 else
1047 return (0);
1048 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1049 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1050 cur++;
1051 }
1052 return (1);
1053}
1054
Owen Taylor3473f882001-02-23 17:55:21 +00001055/************************************************************************
1056 * *
1057 * Parser stacks related functions and macros *
1058 * *
1059 ************************************************************************/
1060
1061xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1062 const xmlChar ** str);
1063
Daniel Veillard0fb18932003-09-07 09:14:37 +00001064#ifdef SAX2
1065/**
1066 * nsPush:
1067 * @ctxt: an XML parser context
1068 * @prefix: the namespace prefix or NULL
1069 * @URL: the namespace name
1070 *
1071 * Pushes a new parser namespace on top of the ns stack
1072 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001073 * Returns -1 in case of error, -2 if the namespace should be discarded
1074 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001075 */
1076static int
1077nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1078{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001079 if (ctxt->options & XML_PARSE_NSCLEAN) {
1080 int i;
1081 for (i = 0;i < ctxt->nsNr;i += 2) {
1082 if (ctxt->nsTab[i] == prefix) {
1083 /* in scope */
1084 if (ctxt->nsTab[i + 1] == URL)
1085 return(-2);
1086 /* out of scope keep it */
1087 break;
1088 }
1089 }
1090 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001091 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1092 ctxt->nsMax = 10;
1093 ctxt->nsNr = 0;
1094 ctxt->nsTab = (const xmlChar **)
1095 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1096 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001097 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001098 ctxt->nsMax = 0;
1099 return (-1);
1100 }
1101 } else if (ctxt->nsNr >= ctxt->nsMax) {
1102 ctxt->nsMax *= 2;
1103 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001104 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001105 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1106 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001107 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001108 ctxt->nsMax /= 2;
1109 return (-1);
1110 }
1111 }
1112 ctxt->nsTab[ctxt->nsNr++] = prefix;
1113 ctxt->nsTab[ctxt->nsNr++] = URL;
1114 return (ctxt->nsNr);
1115}
1116/**
1117 * nsPop:
1118 * @ctxt: an XML parser context
1119 * @nr: the number to pop
1120 *
1121 * Pops the top @nr parser prefix/namespace from the ns stack
1122 *
1123 * Returns the number of namespaces removed
1124 */
1125static int
1126nsPop(xmlParserCtxtPtr ctxt, int nr)
1127{
1128 int i;
1129
1130 if (ctxt->nsTab == NULL) return(0);
1131 if (ctxt->nsNr < nr) {
1132 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1133 nr = ctxt->nsNr;
1134 }
1135 if (ctxt->nsNr <= 0)
1136 return (0);
1137
1138 for (i = 0;i < nr;i++) {
1139 ctxt->nsNr--;
1140 ctxt->nsTab[ctxt->nsNr] = NULL;
1141 }
1142 return(nr);
1143}
1144#endif
1145
1146static int
1147xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1148 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001150 int maxatts;
1151
1152 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001154 atts = (const xmlChar **)
1155 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001157 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1159 if (attallocs == NULL) goto mem_error;
1160 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001161 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 } else if (nr + 5 > ctxt->maxatts) {
1163 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001164 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1165 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001167 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1169 (maxatts / 5) * sizeof(int));
1170 if (attallocs == NULL) goto mem_error;
1171 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001172 ctxt->maxatts = maxatts;
1173 }
1174 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001175mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001176 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001177 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001178}
1179
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001180/**
1181 * inputPush:
1182 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001183 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 *
1185 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001186 *
1187 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001188 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001189int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001190inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1191{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001192 if ((ctxt == NULL) || (value == NULL))
1193 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 if (ctxt->inputNr >= ctxt->inputMax) {
1195 ctxt->inputMax *= 2;
1196 ctxt->inputTab =
1197 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1198 ctxt->inputMax *
1199 sizeof(ctxt->inputTab[0]));
1200 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001201 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001202 return (0);
1203 }
1204 }
1205 ctxt->inputTab[ctxt->inputNr] = value;
1206 ctxt->input = value;
1207 return (ctxt->inputNr++);
1208}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001209/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001210 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 * @ctxt: an XML parser context
1212 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001214 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001215 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001216 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001217xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001218inputPop(xmlParserCtxtPtr ctxt)
1219{
1220 xmlParserInputPtr ret;
1221
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001222 if (ctxt == NULL)
1223 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001225 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001226 ctxt->inputNr--;
1227 if (ctxt->inputNr > 0)
1228 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1229 else
1230 ctxt->input = NULL;
1231 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001232 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001233 return (ret);
1234}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001235/**
1236 * nodePush:
1237 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001238 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001239 *
1240 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001241 *
1242 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001243 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001244int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001245nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1246{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001247 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001248 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001249 xmlNodePtr *tmp;
1250
1251 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1252 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001253 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001254 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001255 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 return (0);
1257 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001258 ctxt->nodeTab = tmp;
1259 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001260 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001261 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001262 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001263 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1264 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001265 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001266 return(0);
1267 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001268 ctxt->nodeTab[ctxt->nodeNr] = value;
1269 ctxt->node = value;
1270 return (ctxt->nodeNr++);
1271}
1272/**
1273 * nodePop:
1274 * @ctxt: an XML parser context
1275 *
1276 * Pops the top element node from the node stack
1277 *
1278 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001279 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001280xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001281nodePop(xmlParserCtxtPtr ctxt)
1282{
1283 xmlNodePtr ret;
1284
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001285 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001286 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001287 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001288 ctxt->nodeNr--;
1289 if (ctxt->nodeNr > 0)
1290 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1291 else
1292 ctxt->node = NULL;
1293 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001294 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001295 return (ret);
1296}
Daniel Veillarda2351322004-06-27 12:08:10 +00001297
1298#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001299/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 * nameNsPush:
1301 * @ctxt: an XML parser context
1302 * @value: the element name
1303 * @prefix: the element prefix
1304 * @URI: the element namespace name
1305 *
1306 * Pushes a new element name/prefix/URL on top of the name stack
1307 *
1308 * Returns -1 in case of error, the index in the stack otherwise
1309 */
1310static int
1311nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1312 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1313{
1314 if (ctxt->nameNr >= ctxt->nameMax) {
1315 const xmlChar * *tmp;
1316 void **tmp2;
1317 ctxt->nameMax *= 2;
1318 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1319 ctxt->nameMax *
1320 sizeof(ctxt->nameTab[0]));
1321 if (tmp == NULL) {
1322 ctxt->nameMax /= 2;
1323 goto mem_error;
1324 }
1325 ctxt->nameTab = tmp;
1326 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1327 ctxt->nameMax * 3 *
1328 sizeof(ctxt->pushTab[0]));
1329 if (tmp2 == NULL) {
1330 ctxt->nameMax /= 2;
1331 goto mem_error;
1332 }
1333 ctxt->pushTab = tmp2;
1334 }
1335 ctxt->nameTab[ctxt->nameNr] = value;
1336 ctxt->name = value;
1337 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1338 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001339 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001340 return (ctxt->nameNr++);
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001343 return (-1);
1344}
1345/**
1346 * nameNsPop:
1347 * @ctxt: an XML parser context
1348 *
1349 * Pops the top element/prefix/URI name from the name stack
1350 *
1351 * Returns the name just removed
1352 */
1353static const xmlChar *
1354nameNsPop(xmlParserCtxtPtr ctxt)
1355{
1356 const xmlChar *ret;
1357
1358 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001359 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001360 ctxt->nameNr--;
1361 if (ctxt->nameNr > 0)
1362 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1363 else
1364 ctxt->name = NULL;
1365 ret = ctxt->nameTab[ctxt->nameNr];
1366 ctxt->nameTab[ctxt->nameNr] = NULL;
1367 return (ret);
1368}
Daniel Veillarda2351322004-06-27 12:08:10 +00001369#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370
1371/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001372 * namePush:
1373 * @ctxt: an XML parser context
1374 * @value: the element name
1375 *
1376 * Pushes a new element name on top of the name stack
1377 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001378 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001381namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001382{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001383 if (ctxt == NULL) return (-1);
1384
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001386 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001388 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 ctxt->nameMax *
1390 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001391 if (tmp == NULL) {
1392 ctxt->nameMax /= 2;
1393 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001395 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001396 }
1397 ctxt->nameTab[ctxt->nameNr] = value;
1398 ctxt->name = value;
1399 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001400mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001402 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001403}
1404/**
1405 * namePop:
1406 * @ctxt: an XML parser context
1407 *
1408 * Pops the top element name from the name stack
1409 *
1410 * Returns the name just removed
1411 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001412const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413namePop(xmlParserCtxtPtr ctxt)
1414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001415 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001417 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1418 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001419 ctxt->nameNr--;
1420 if (ctxt->nameNr > 0)
1421 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1422 else
1423 ctxt->name = NULL;
1424 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001425 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001426 return (ret);
1427}
Owen Taylor3473f882001-02-23 17:55:21 +00001428
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001429static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001430 if (ctxt->spaceNr >= ctxt->spaceMax) {
1431 ctxt->spaceMax *= 2;
1432 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1433 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1434 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001436 return(0);
1437 }
1438 }
1439 ctxt->spaceTab[ctxt->spaceNr] = val;
1440 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1441 return(ctxt->spaceNr++);
1442}
1443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001444static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001445 int ret;
1446 if (ctxt->spaceNr <= 0) return(0);
1447 ctxt->spaceNr--;
1448 if (ctxt->spaceNr > 0)
1449 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1450 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001451 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001452 ret = ctxt->spaceTab[ctxt->spaceNr];
1453 ctxt->spaceTab[ctxt->spaceNr] = -1;
1454 return(ret);
1455}
1456
1457/*
1458 * Macros for accessing the content. Those should be used only by the parser,
1459 * and not exported.
1460 *
1461 * Dirty macros, i.e. one often need to make assumption on the context to
1462 * use them
1463 *
1464 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1465 * To be used with extreme caution since operations consuming
1466 * characters may move the input buffer to a different location !
1467 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1468 * This should be used internally by the parser
1469 * only to compare to ASCII values otherwise it would break when
1470 * running with UTF-8 encoding.
1471 * RAW same as CUR but in the input buffer, bypass any token
1472 * extraction that may have been done
1473 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1474 * to compare on ASCII based substring.
1475 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001476 * strings without newlines within the parser.
1477 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1478 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001479 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1480 *
1481 * NEXT Skip to the next character, this does the proper decoding
1482 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001483 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001484 * CUR_CHAR(l) returns the current unicode character (int), set l
1485 * to the number of xmlChars used for the encoding [0-5].
1486 * CUR_SCHAR same but operate on a string instead of the context
1487 * COPY_BUF copy the current unicode char to the target buffer, increment
1488 * the index
1489 * GROW, SHRINK handling of input buffers
1490 */
1491
Daniel Veillardfdc91562002-07-01 21:52:03 +00001492#define RAW (*ctxt->input->cur)
1493#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001494#define NXT(val) ctxt->input->cur[(val)]
1495#define CUR_PTR ctxt->input->cur
1496
Daniel Veillarda07050d2003-10-19 14:46:32 +00001497#define CMP4( s, c1, c2, c3, c4 ) \
1498 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1499 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1500#define CMP5( s, c1, c2, c3, c4, c5 ) \
1501 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1502#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1503 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1504#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1505 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1506#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1507 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1508#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1509 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1510 ((unsigned char *) s)[ 8 ] == c9 )
1511#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1512 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1513 ((unsigned char *) s)[ 9 ] == c10 )
1514
Owen Taylor3473f882001-02-23 17:55:21 +00001515#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001516 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001517 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001519 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1520 xmlPopInput(ctxt); \
1521 } while (0)
1522
Daniel Veillard0b787f32004-03-26 17:29:53 +00001523#define SKIPL(val) do { \
1524 int skipl; \
1525 for(skipl=0; skipl<val; skipl++) { \
1526 if (*(ctxt->input->cur) == '\n') { \
1527 ctxt->input->line++; ctxt->input->col = 1; \
1528 } else ctxt->input->col++; \
1529 ctxt->nbChars++; \
1530 ctxt->input->cur++; \
1531 } \
1532 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1533 if ((*ctxt->input->cur == 0) && \
1534 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1535 xmlPopInput(ctxt); \
1536 } while (0)
1537
Daniel Veillarda880b122003-04-21 21:36:41 +00001538#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001539 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1540 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001541 xmlSHRINK (ctxt);
1542
1543static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1544 xmlParserInputShrink(ctxt->input);
1545 if ((*ctxt->input->cur == 0) &&
1546 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1547 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001548 }
Owen Taylor3473f882001-02-23 17:55:21 +00001549
Daniel Veillarda880b122003-04-21 21:36:41 +00001550#define GROW if ((ctxt->progressive == 0) && \
1551 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001552 xmlGROW (ctxt);
1553
1554static void xmlGROW (xmlParserCtxtPtr ctxt) {
1555 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1556 if ((*ctxt->input->cur == 0) &&
1557 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1558 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001559}
Owen Taylor3473f882001-02-23 17:55:21 +00001560
1561#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1562
1563#define NEXT xmlNextChar(ctxt)
1564
Daniel Veillard21a0f912001-02-25 19:54:14 +00001565#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001566 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001567 ctxt->input->cur++; \
1568 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001569 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001570 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1571 }
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573#define NEXTL(l) do { \
1574 if (*(ctxt->input->cur) == '\n') { \
1575 ctxt->input->line++; ctxt->input->col = 1; \
1576 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001577 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001578 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001579 } while (0)
1580
1581#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1582#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1583
1584#define COPY_BUF(l,b,i,v) \
1585 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001586 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001587
1588/**
1589 * xmlSkipBlankChars:
1590 * @ctxt: the XML parser context
1591 *
1592 * skip all blanks character found at that point in the input streams.
1593 * It pops up finished entities in the process if allowable at that point.
1594 *
1595 * Returns the number of space chars skipped
1596 */
1597
1598int
1599xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001600 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001601
1602 /*
1603 * It's Okay to use CUR/NEXT here since all the blanks are on
1604 * the ASCII range.
1605 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001606 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1607 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001608 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001609 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001610 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001611 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001612 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001613 if (*cur == '\n') {
1614 ctxt->input->line++; ctxt->input->col = 1;
1615 }
1616 cur++;
1617 res++;
1618 if (*cur == 0) {
1619 ctxt->input->cur = cur;
1620 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1621 cur = ctxt->input->cur;
1622 }
1623 }
1624 ctxt->input->cur = cur;
1625 } else {
1626 int cur;
1627 do {
1628 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001629 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001630 NEXT;
1631 cur = CUR;
1632 res++;
1633 }
1634 while ((cur == 0) && (ctxt->inputNr > 1) &&
1635 (ctxt->instate != XML_PARSER_COMMENT)) {
1636 xmlPopInput(ctxt);
1637 cur = CUR;
1638 }
1639 /*
1640 * Need to handle support of entities branching here
1641 */
1642 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1643 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1644 }
Owen Taylor3473f882001-02-23 17:55:21 +00001645 return(res);
1646}
1647
1648/************************************************************************
1649 * *
1650 * Commodity functions to handle entities *
1651 * *
1652 ************************************************************************/
1653
1654/**
1655 * xmlPopInput:
1656 * @ctxt: an XML parser context
1657 *
1658 * xmlPopInput: the current input pointed by ctxt->input came to an end
1659 * pop it and return the next char.
1660 *
1661 * Returns the current xmlChar in the parser context
1662 */
1663xmlChar
1664xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001665 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001666 if (xmlParserDebugEntities)
1667 xmlGenericError(xmlGenericErrorContext,
1668 "Popping input %d\n", ctxt->inputNr);
1669 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001670 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001671 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1672 return(xmlPopInput(ctxt));
1673 return(CUR);
1674}
1675
1676/**
1677 * xmlPushInput:
1678 * @ctxt: an XML parser context
1679 * @input: an XML parser input fragment (entity, XML fragment ...).
1680 *
1681 * xmlPushInput: switch to a new input stream which is stacked on top
1682 * of the previous one(s).
1683 */
1684void
1685xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1686 if (input == NULL) return;
1687
1688 if (xmlParserDebugEntities) {
1689 if ((ctxt->input != NULL) && (ctxt->input->filename))
1690 xmlGenericError(xmlGenericErrorContext,
1691 "%s(%d): ", ctxt->input->filename,
1692 ctxt->input->line);
1693 xmlGenericError(xmlGenericErrorContext,
1694 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1695 }
1696 inputPush(ctxt, input);
1697 GROW;
1698}
1699
1700/**
1701 * xmlParseCharRef:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse Reference declarations
1705 *
1706 * [66] CharRef ::= '&#' [0-9]+ ';' |
1707 * '&#x' [0-9a-fA-F]+ ';'
1708 *
1709 * [ WFC: Legal Character ]
1710 * Characters referred to using character references must match the
1711 * production for Char.
1712 *
1713 * Returns the value parsed (as an int), 0 in case of error
1714 */
1715int
1716xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001717 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001718 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001719 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Owen Taylor3473f882001-02-23 17:55:21 +00001721 /*
1722 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1723 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001725 (NXT(2) == 'x')) {
1726 SKIP(3);
1727 GROW;
1728 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001729 if (count++ > 20) {
1730 count = 0;
1731 GROW;
1732 }
1733 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001734 val = val * 16 + (CUR - '0');
1735 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1736 val = val * 16 + (CUR - 'a') + 10;
1737 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1738 val = val * 16 + (CUR - 'A') + 10;
1739 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001740 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001741 val = 0;
1742 break;
1743 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001744 if (val > 0x10FFFF)
1745 outofrange = val;
1746
Owen Taylor3473f882001-02-23 17:55:21 +00001747 NEXT;
1748 count++;
1749 }
1750 if (RAW == ';') {
1751 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001752 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001753 ctxt->nbChars ++;
1754 ctxt->input->cur++;
1755 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001756 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001757 SKIP(2);
1758 GROW;
1759 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001760 if (count++ > 20) {
1761 count = 0;
1762 GROW;
1763 }
1764 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001765 val = val * 10 + (CUR - '0');
1766 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001768 val = 0;
1769 break;
1770 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001771 if (val > 0x10FFFF)
1772 outofrange = val;
1773
Owen Taylor3473f882001-02-23 17:55:21 +00001774 NEXT;
1775 count++;
1776 }
1777 if (RAW == ';') {
1778 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001780 ctxt->nbChars ++;
1781 ctxt->input->cur++;
1782 }
1783 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001784 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001785 }
1786
1787 /*
1788 * [ WFC: Legal Character ]
1789 * Characters referred to using character references must match the
1790 * production for Char.
1791 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(val);
1794 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1796 "xmlParseCharRef: invalid xmlChar value %d\n",
1797 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001798 }
1799 return(0);
1800}
1801
1802/**
1803 * xmlParseStringCharRef:
1804 * @ctxt: an XML parser context
1805 * @str: a pointer to an index in the string
1806 *
1807 * parse Reference declarations, variant parsing from a string rather
1808 * than an an input flow.
1809 *
1810 * [66] CharRef ::= '&#' [0-9]+ ';' |
1811 * '&#x' [0-9a-fA-F]+ ';'
1812 *
1813 * [ WFC: Legal Character ]
1814 * Characters referred to using character references must match the
1815 * production for Char.
1816 *
1817 * Returns the value parsed (as an int), 0 in case of error, str will be
1818 * updated to the current value of the index
1819 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001820static int
Owen Taylor3473f882001-02-23 17:55:21 +00001821xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1822 const xmlChar *ptr;
1823 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001824 unsigned int val = 0;
1825 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001826
1827 if ((str == NULL) || (*str == NULL)) return(0);
1828 ptr = *str;
1829 cur = *ptr;
1830 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1831 ptr += 3;
1832 cur = *ptr;
1833 while (cur != ';') { /* Non input consuming loop */
1834 if ((cur >= '0') && (cur <= '9'))
1835 val = val * 16 + (cur - '0');
1836 else if ((cur >= 'a') && (cur <= 'f'))
1837 val = val * 16 + (cur - 'a') + 10;
1838 else if ((cur >= 'A') && (cur <= 'F'))
1839 val = val * 16 + (cur - 'A') + 10;
1840 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 val = 0;
1843 break;
1844 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001845 if (val > 0x10FFFF)
1846 outofrange = val;
1847
Owen Taylor3473f882001-02-23 17:55:21 +00001848 ptr++;
1849 cur = *ptr;
1850 }
1851 if (cur == ';')
1852 ptr++;
1853 } else if ((cur == '&') && (ptr[1] == '#')){
1854 ptr += 2;
1855 cur = *ptr;
1856 while (cur != ';') { /* Non input consuming loops */
1857 if ((cur >= '0') && (cur <= '9'))
1858 val = val * 10 + (cur - '0');
1859 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001860 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001861 val = 0;
1862 break;
1863 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001864 if (val > 0x10FFFF)
1865 outofrange = val;
1866
Owen Taylor3473f882001-02-23 17:55:21 +00001867 ptr++;
1868 cur = *ptr;
1869 }
1870 if (cur == ';')
1871 ptr++;
1872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001873 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001874 return(0);
1875 }
1876 *str = ptr;
1877
1878 /*
1879 * [ WFC: Legal Character ]
1880 * Characters referred to using character references must match the
1881 * production for Char.
1882 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001883 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 return(val);
1885 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001886 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1887 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1888 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001889 }
1890 return(0);
1891}
1892
1893/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001894 * xmlNewBlanksWrapperInputStream:
1895 * @ctxt: an XML parser context
1896 * @entity: an Entity pointer
1897 *
1898 * Create a new input stream for wrapping
1899 * blanks around a PEReference
1900 *
1901 * Returns the new input stream or NULL
1902 */
1903
1904static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1905
Daniel Veillardf4862f02002-09-10 11:13:43 +00001906static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001907xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1908 xmlParserInputPtr input;
1909 xmlChar *buffer;
1910 size_t length;
1911 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1913 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001914 return(NULL);
1915 }
1916 if (xmlParserDebugEntities)
1917 xmlGenericError(xmlGenericErrorContext,
1918 "new blanks wrapper for entity: %s\n", entity->name);
1919 input = xmlNewInputStream(ctxt);
1920 if (input == NULL) {
1921 return(NULL);
1922 }
1923 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001924 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001926 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001927 return(NULL);
1928 }
1929 buffer [0] = ' ';
1930 buffer [1] = '%';
1931 buffer [length-3] = ';';
1932 buffer [length-2] = ' ';
1933 buffer [length-1] = 0;
1934 memcpy(buffer + 2, entity->name, length - 5);
1935 input->free = deallocblankswrapper;
1936 input->base = buffer;
1937 input->cur = buffer;
1938 input->length = length;
1939 input->end = &buffer[length];
1940 return(input);
1941}
1942
1943/**
Owen Taylor3473f882001-02-23 17:55:21 +00001944 * xmlParserHandlePEReference:
1945 * @ctxt: the parser context
1946 *
1947 * [69] PEReference ::= '%' Name ';'
1948 *
1949 * [ WFC: No Recursion ]
1950 * A parsed entity must not contain a recursive
1951 * reference to itself, either directly or indirectly.
1952 *
1953 * [ WFC: Entity Declared ]
1954 * In a document without any DTD, a document with only an internal DTD
1955 * subset which contains no parameter entity references, or a document
1956 * with "standalone='yes'", ... ... The declaration of a parameter
1957 * entity must precede any reference to it...
1958 *
1959 * [ VC: Entity Declared ]
1960 * In a document with an external subset or external parameter entities
1961 * with "standalone='no'", ... ... The declaration of a parameter entity
1962 * must precede any reference to it...
1963 *
1964 * [ WFC: In DTD ]
1965 * Parameter-entity references may only appear in the DTD.
1966 * NOTE: misleading but this is handled.
1967 *
1968 * A PEReference may have been detected in the current input stream
1969 * the handling is done accordingly to
1970 * http://www.w3.org/TR/REC-xml#entproc
1971 * i.e.
1972 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001973 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001974 */
1975void
1976xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001977 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001978 xmlEntityPtr entity = NULL;
1979 xmlParserInputPtr input;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 if (RAW != '%') return;
1982 switch(ctxt->instate) {
1983 case XML_PARSER_CDATA_SECTION:
1984 return;
1985 case XML_PARSER_COMMENT:
1986 return;
1987 case XML_PARSER_START_TAG:
1988 return;
1989 case XML_PARSER_END_TAG:
1990 return;
1991 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001992 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001993 return;
1994 case XML_PARSER_PROLOG:
1995 case XML_PARSER_START:
1996 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001997 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001998 return;
1999 case XML_PARSER_ENTITY_DECL:
2000 case XML_PARSER_CONTENT:
2001 case XML_PARSER_ATTRIBUTE_VALUE:
2002 case XML_PARSER_PI:
2003 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002004 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002005 /* we just ignore it there */
2006 return;
2007 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002008 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 return;
2010 case XML_PARSER_ENTITY_VALUE:
2011 /*
2012 * NOTE: in the case of entity values, we don't do the
2013 * substitution here since we need the literal
2014 * entity value to be able to save the internal
2015 * subset of the document.
2016 * This will be handled by xmlStringDecodeEntities
2017 */
2018 return;
2019 case XML_PARSER_DTD:
2020 /*
2021 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2022 * In the internal DTD subset, parameter-entity references
2023 * can occur only where markup declarations can occur, not
2024 * within markup declarations.
2025 * In that case this is handled in xmlParseMarkupDecl
2026 */
2027 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2028 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002029 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002030 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002031 break;
2032 case XML_PARSER_IGNORE:
2033 return;
2034 }
2035
2036 NEXT;
2037 name = xmlParseName(ctxt);
2038 if (xmlParserDebugEntities)
2039 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002040 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002041 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002042 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002043 } else {
2044 if (RAW == ';') {
2045 NEXT;
2046 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2047 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2048 if (entity == NULL) {
2049
2050 /*
2051 * [ WFC: Entity Declared ]
2052 * In a document without any DTD, a document with only an
2053 * internal DTD subset which contains no parameter entity
2054 * references, or a document with "standalone='yes'", ...
2055 * ... The declaration of a parameter entity must precede
2056 * any reference to it...
2057 */
2058 if ((ctxt->standalone == 1) ||
2059 ((ctxt->hasExternalSubset == 0) &&
2060 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002062 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002063 } else {
2064 /*
2065 * [ VC: Entity Declared ]
2066 * In a document with an external subset or external
2067 * parameter entities with "standalone='no'", ...
2068 * ... The declaration of a parameter entity must precede
2069 * any reference to it...
2070 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002071 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2072 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2073 "PEReference: %%%s; not found\n",
2074 name);
2075 } else
2076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2077 "PEReference: %%%s; not found\n",
2078 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->valid = 0;
2080 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002081 } else if (ctxt->input->free != deallocblankswrapper) {
2082 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2083 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 } else {
2085 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2086 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002087 xmlChar start[4];
2088 xmlCharEncoding enc;
2089
Owen Taylor3473f882001-02-23 17:55:21 +00002090 /*
2091 * handle the extra spaces added before and after
2092 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002093 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002094 */
2095 input = xmlNewEntityInputStream(ctxt, entity);
2096 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002097
2098 /*
2099 * Get the 4 first bytes and decode the charset
2100 * if enc != XML_CHAR_ENCODING_NONE
2101 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002102 * Note that, since we may have some non-UTF8
2103 * encoding (like UTF16, bug 135229), the 'length'
2104 * is not known, but we can calculate based upon
2105 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002106 */
2107 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002108 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002109 start[0] = RAW;
2110 start[1] = NXT(1);
2111 start[2] = NXT(2);
2112 start[3] = NXT(3);
2113 enc = xmlDetectCharEncoding(start, 4);
2114 if (enc != XML_CHAR_ENCODING_NONE) {
2115 xmlSwitchEncoding(ctxt, enc);
2116 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002120 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2121 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002122 xmlParseTextDecl(ctxt);
2123 }
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002125 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2126 "PEReference: %s is not a parameter entity\n",
2127 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002128 }
2129 }
2130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002131 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002132 }
Owen Taylor3473f882001-02-23 17:55:21 +00002133 }
2134}
2135
2136/*
2137 * Macro used to grow the current buffer.
2138 */
2139#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002140 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002141 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002142 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002144 if (tmp == NULL) goto mem_error; \
2145 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002146}
2147
2148/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002149 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002150 * @ctxt: the parser context
2151 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002152 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002153 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2154 * @end: an end marker xmlChar, 0 if none
2155 * @end2: an end marker xmlChar, 0 if none
2156 * @end3: an end marker xmlChar, 0 if none
2157 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002158 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002159 *
2160 * [67] Reference ::= EntityRef | CharRef
2161 *
2162 * [69] PEReference ::= '%' Name ';'
2163 *
2164 * Returns A newly allocated string with the substitution done. The caller
2165 * must deallocate it !
2166 */
2167xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002168xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2169 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002170 xmlChar *buffer = NULL;
2171 int buffer_size = 0;
2172
2173 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002174 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002175 xmlEntityPtr ent;
2176 int c,l;
2177 int nbchars = 0;
2178
Daniel Veillarda82b1822004-11-08 16:24:57 +00002179 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002180 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002181 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002184 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(NULL);
2186 }
2187
2188 /*
2189 * allocate a translation buffer.
2190 */
2191 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002192 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002193 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002194
2195 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002196 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002197 * we are operating on already parsed values.
2198 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002199 if (str < last)
2200 c = CUR_SCHAR(str, l);
2201 else
2202 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203 while ((c != 0) && (c != end) && /* non input consuming loop */
2204 (c != end2) && (c != end3)) {
2205
2206 if (c == 0) break;
2207 if ((c == '&') && (str[1] == '#')) {
2208 int val = xmlParseStringCharRef(ctxt, &str);
2209 if (val != 0) {
2210 COPY_BUF(0,buffer,nbchars,val);
2211 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002212 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
Owen Taylor3473f882001-02-23 17:55:21 +00002215 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "String decoding Entity Reference: %.30s\n",
2219 str);
2220 ent = xmlParseStringEntityRef(ctxt, &str);
2221 if ((ent != NULL) &&
2222 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2223 if (ent->content != NULL) {
2224 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002225 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2226 growBuffer(buffer);
2227 }
Owen Taylor3473f882001-02-23 17:55:21 +00002228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002229 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2230 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002231 }
2232 } else if ((ent != NULL) && (ent->content != NULL)) {
2233 xmlChar *rep;
2234
2235 ctxt->depth++;
2236 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2237 0, 0, 0);
2238 ctxt->depth--;
2239 if (rep != NULL) {
2240 current = rep;
2241 while (*current != 0) { /* non input consuming loop */
2242 buffer[nbchars++] = *current++;
2243 if (nbchars >
2244 buffer_size - XML_PARSER_BUFFER_SIZE) {
2245 growBuffer(buffer);
2246 }
2247 }
2248 xmlFree(rep);
2249 }
2250 } else if (ent != NULL) {
2251 int i = xmlStrlen(ent->name);
2252 const xmlChar *cur = ent->name;
2253
2254 buffer[nbchars++] = '&';
2255 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2256 growBuffer(buffer);
2257 }
2258 for (;i > 0;i--)
2259 buffer[nbchars++] = *cur++;
2260 buffer[nbchars++] = ';';
2261 }
2262 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2263 if (xmlParserDebugEntities)
2264 xmlGenericError(xmlGenericErrorContext,
2265 "String decoding PE Reference: %.30s\n", str);
2266 ent = xmlParseStringPEReference(ctxt, &str);
2267 if (ent != NULL) {
2268 xmlChar *rep;
2269
2270 ctxt->depth++;
2271 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2272 0, 0, 0);
2273 ctxt->depth--;
2274 if (rep != NULL) {
2275 current = rep;
2276 while (*current != 0) { /* non input consuming loop */
2277 buffer[nbchars++] = *current++;
2278 if (nbchars >
2279 buffer_size - XML_PARSER_BUFFER_SIZE) {
2280 growBuffer(buffer);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 }
2286 } else {
2287 COPY_BUF(l,buffer,nbchars,c);
2288 str += l;
2289 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2290 growBuffer(buffer);
2291 }
2292 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002293 if (str < last)
2294 c = CUR_SCHAR(str, l);
2295 else
2296 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002297 }
2298 buffer[nbchars++] = 0;
2299 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002300
2301mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304}
2305
Daniel Veillarde57ec792003-09-10 10:50:59 +00002306/**
2307 * xmlStringDecodeEntities:
2308 * @ctxt: the parser context
2309 * @str: the input string
2310 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2311 * @end: an end marker xmlChar, 0 if none
2312 * @end2: an end marker xmlChar, 0 if none
2313 * @end3: an end marker xmlChar, 0 if none
2314 *
2315 * Takes a entity string content and process to do the adequate substitutions.
2316 *
2317 * [67] Reference ::= EntityRef | CharRef
2318 *
2319 * [69] PEReference ::= '%' Name ';'
2320 *
2321 * Returns A newly allocated string with the substitution done. The caller
2322 * must deallocate it !
2323 */
2324xmlChar *
2325xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2326 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002327 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002328 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2329 end, end2, end3));
2330}
Owen Taylor3473f882001-02-23 17:55:21 +00002331
2332/************************************************************************
2333 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002334 * Commodity functions, cleanup needed ? *
2335 * *
2336 ************************************************************************/
2337
2338/**
2339 * areBlanks:
2340 * @ctxt: an XML parser context
2341 * @str: a xmlChar *
2342 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002343 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002344 *
2345 * Is this a sequence of blank chars that one can ignore ?
2346 *
2347 * Returns 1 if ignorable 0 otherwise.
2348 */
2349
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002350static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2351 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 int i, ret;
2353 xmlNodePtr lastChild;
2354
Daniel Veillard05c13a22001-09-09 08:38:09 +00002355 /*
2356 * Don't spend time trying to differentiate them, the same callback is
2357 * used !
2358 */
2359 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002360 return(0);
2361
Owen Taylor3473f882001-02-23 17:55:21 +00002362 /*
2363 * Check for xml:space value.
2364 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002365 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2366 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002367 return(0);
2368
2369 /*
2370 * Check that the string is made of blanks
2371 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002372 if (blank_chars == 0) {
2373 for (i = 0;i < len;i++)
2374 if (!(IS_BLANK_CH(str[i]))) return(0);
2375 }
Owen Taylor3473f882001-02-23 17:55:21 +00002376
2377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002378 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002379 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002380 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 if (ctxt->myDoc != NULL) {
2382 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2383 if (ret == 0) return(1);
2384 if (ret == 1) return(0);
2385 }
2386
2387 /*
2388 * Otherwise, heuristic :-\
2389 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002390 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002391 if ((ctxt->node->children == NULL) &&
2392 (RAW == '<') && (NXT(1) == '/')) return(0);
2393
2394 lastChild = xmlGetLastChild(ctxt->node);
2395 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002396 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2397 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002398 } else if (xmlNodeIsText(lastChild))
2399 return(0);
2400 else if ((ctxt->node->children != NULL) &&
2401 (xmlNodeIsText(ctxt->node->children)))
2402 return(0);
2403 return(1);
2404}
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406/************************************************************************
2407 * *
2408 * Extra stuff for namespace support *
2409 * Relates to http://www.w3.org/TR/WD-xml-names *
2410 * *
2411 ************************************************************************/
2412
2413/**
2414 * xmlSplitQName:
2415 * @ctxt: an XML parser context
2416 * @name: an XML parser context
2417 * @prefix: a xmlChar **
2418 *
2419 * parse an UTF8 encoded XML qualified name string
2420 *
2421 * [NS 5] QName ::= (Prefix ':')? LocalPart
2422 *
2423 * [NS 6] Prefix ::= NCName
2424 *
2425 * [NS 7] LocalPart ::= NCName
2426 *
2427 * Returns the local part, and prefix is updated
2428 * to get the Prefix if any.
2429 */
2430
2431xmlChar *
2432xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2433 xmlChar buf[XML_MAX_NAMELEN + 5];
2434 xmlChar *buffer = NULL;
2435 int len = 0;
2436 int max = XML_MAX_NAMELEN;
2437 xmlChar *ret = NULL;
2438 const xmlChar *cur = name;
2439 int c;
2440
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002441 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 *prefix = NULL;
2443
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002444 if (cur == NULL) return(NULL);
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446#ifndef XML_XML_NAMESPACE
2447 /* xml: prefix is not really a namespace */
2448 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2449 (cur[2] == 'l') && (cur[3] == ':'))
2450 return(xmlStrdup(name));
2451#endif
2452
Daniel Veillard597bc482003-07-24 16:08:28 +00002453 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002454 if (cur[0] == ':')
2455 return(xmlStrdup(name));
2456
2457 c = *cur++;
2458 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2459 buf[len++] = c;
2460 c = *cur++;
2461 }
2462 if (len >= max) {
2463 /*
2464 * Okay someone managed to make a huge name, so he's ready to pay
2465 * for the processing speed.
2466 */
2467 max = len * 2;
2468
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 return(NULL);
2473 }
2474 memcpy(buffer, buf, len);
2475 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2476 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002477 xmlChar *tmp;
2478
Owen Taylor3473f882001-02-23 17:55:21 +00002479 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002480 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002481 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002482 if (tmp == NULL) {
2483 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return(NULL);
2486 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002487 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002488 }
2489 buffer[len++] = c;
2490 c = *cur++;
2491 }
2492 buffer[len] = 0;
2493 }
2494
Daniel Veillard597bc482003-07-24 16:08:28 +00002495 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002496 if (buffer != NULL)
2497 xmlFree(buffer);
2498 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002499 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002500 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 if (buffer == NULL)
2503 ret = xmlStrndup(buf, len);
2504 else {
2505 ret = buffer;
2506 buffer = NULL;
2507 max = XML_MAX_NAMELEN;
2508 }
2509
2510
2511 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002512 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002513 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002514 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002515 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002516 }
Owen Taylor3473f882001-02-23 17:55:21 +00002517 len = 0;
2518
Daniel Veillardbb284f42002-10-16 18:02:47 +00002519 /*
2520 * Check that the first character is proper to start
2521 * a new name
2522 */
2523 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2524 ((c >= 0x41) && (c <= 0x5A)) ||
2525 (c == '_') || (c == ':'))) {
2526 int l;
2527 int first = CUR_SCHAR(cur, l);
2528
2529 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002530 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002531 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002532 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002533 }
2534 }
2535 cur++;
2536
Owen Taylor3473f882001-02-23 17:55:21 +00002537 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2538 buf[len++] = c;
2539 c = *cur++;
2540 }
2541 if (len >= max) {
2542 /*
2543 * Okay someone managed to make a huge name, so he's ready to pay
2544 * for the processing speed.
2545 */
2546 max = len * 2;
2547
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002548 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002550 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 return(NULL);
2552 }
2553 memcpy(buffer, buf, len);
2554 while (c != 0) { /* tested bigname2.xml */
2555 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002556 xmlChar *tmp;
2557
Owen Taylor3473f882001-02-23 17:55:21 +00002558 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002559 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002560 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002561 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002563 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002564 return(NULL);
2565 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002566 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 }
2568 buffer[len++] = c;
2569 c = *cur++;
2570 }
2571 buffer[len] = 0;
2572 }
2573
2574 if (buffer == NULL)
2575 ret = xmlStrndup(buf, len);
2576 else {
2577 ret = buffer;
2578 }
2579 }
2580
2581 return(ret);
2582}
2583
2584/************************************************************************
2585 * *
2586 * The parser itself *
2587 * Relates to http://www.w3.org/TR/REC-xml *
2588 * *
2589 ************************************************************************/
2590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002591static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002592static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002593 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002594
Owen Taylor3473f882001-02-23 17:55:21 +00002595/**
2596 * xmlParseName:
2597 * @ctxt: an XML parser context
2598 *
2599 * parse an XML name.
2600 *
2601 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2602 * CombiningChar | Extender
2603 *
2604 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2605 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002606 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002607 *
2608 * Returns the Name parsed or NULL
2609 */
2610
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002611const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002612xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002613 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002614 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002615 int count = 0;
2616
2617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002618
2619 /*
2620 * Accelerator for simple ASCII names
2621 */
2622 in = ctxt->input->cur;
2623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2624 ((*in >= 0x41) && (*in <= 0x5A)) ||
2625 (*in == '_') || (*in == ':')) {
2626 in++;
2627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2628 ((*in >= 0x41) && (*in <= 0x5A)) ||
2629 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002630 (*in == '_') || (*in == '-') ||
2631 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002632 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002633 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002634 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002635 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002636 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002637 ctxt->nbChars += count;
2638 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 if (ret == NULL)
2640 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002641 return(ret);
2642 }
2643 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002644 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002646
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647/**
2648 * xmlParseNameAndCompare:
2649 * @ctxt: an XML parser context
2650 *
2651 * parse an XML name and compares for match
2652 * (specialized for endtag parsing)
2653 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002654 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2655 * and the name for mismatch
2656 */
2657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002658static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002659xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002660 register const xmlChar *cmp = other;
2661 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002662 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002663
2664 GROW;
2665
2666 in = ctxt->input->cur;
2667 while (*in != 0 && *in == *cmp) {
2668 ++in;
2669 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002670 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002671 }
William M. Brack76e95df2003-10-18 16:20:14 +00002672 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002673 /* success */
2674 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002675 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002676 }
2677 /* failure (or end of input buffer), check with full function */
2678 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002679 /* strings coming from the dictionnary direct compare possible */
2680 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002681 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002682 }
2683 return ret;
2684}
2685
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002686static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002687xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002688 int len = 0, l;
2689 int c;
2690 int count = 0;
2691
2692 /*
2693 * Handler for more complex cases
2694 */
2695 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002696 c = CUR_CHAR(l);
2697 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2698 (!IS_LETTER(c) && (c != '_') &&
2699 (c != ':'))) {
2700 return(NULL);
2701 }
2702
2703 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002704 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002705 (c == '.') || (c == '-') ||
2706 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002707 (IS_COMBINING(c)) ||
2708 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (count++ > 100) {
2710 count = 0;
2711 GROW;
2712 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002713 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 NEXTL(l);
2715 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Daniel Veillard96688262005-08-23 18:14:12 +00002717 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2718 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002719 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002720}
2721
2722/**
2723 * xmlParseStringName:
2724 * @ctxt: an XML parser context
2725 * @str: a pointer to the string pointer (IN/OUT)
2726 *
2727 * parse an XML name.
2728 *
2729 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2730 * CombiningChar | Extender
2731 *
2732 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002734 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002735 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002736 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * is updated to the current location in the string.
2738 */
2739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002740static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002741xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2742 xmlChar buf[XML_MAX_NAMELEN + 5];
2743 const xmlChar *cur = *str;
2744 int len = 0, l;
2745 int c;
2746
2747 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002748 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002749 (c != ':')) {
2750 return(NULL);
2751 }
2752
William M. Brack871611b2003-10-18 04:53:14 +00002753 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002754 (c == '.') || (c == '-') ||
2755 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002756 (IS_COMBINING(c)) ||
2757 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002758 COPY_BUF(l,buf,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2762 /*
2763 * Okay someone managed to make a huge name, so he's ready to pay
2764 * for the processing speed.
2765 */
2766 xmlChar *buffer;
2767 int max = len * 2;
2768
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002769 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002770 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772 return(NULL);
2773 }
2774 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002775 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002776 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002777 (c == '.') || (c == '-') ||
2778 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002779 (IS_COMBINING(c)) ||
2780 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002782 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002783 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002784 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002785 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002786 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002788 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 return(NULL);
2790 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002791 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 }
2793 COPY_BUF(l,buffer,len,c);
2794 cur += l;
2795 c = CUR_SCHAR(cur, l);
2796 }
2797 buffer[len] = 0;
2798 *str = cur;
2799 return(buffer);
2800 }
2801 }
2802 *str = cur;
2803 return(xmlStrndup(buf, len));
2804}
2805
2806/**
2807 * xmlParseNmtoken:
2808 * @ctxt: an XML parser context
2809 *
2810 * parse an XML Nmtoken.
2811 *
2812 * [7] Nmtoken ::= (NameChar)+
2813 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002814 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002815 *
2816 * Returns the Nmtoken parsed or NULL
2817 */
2818
2819xmlChar *
2820xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2821 xmlChar buf[XML_MAX_NAMELEN + 5];
2822 int len = 0, l;
2823 int c;
2824 int count = 0;
2825
2826 GROW;
2827 c = CUR_CHAR(l);
2828
William M. Brack871611b2003-10-18 04:53:14 +00002829 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002830 (c == '.') || (c == '-') ||
2831 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002832 (IS_COMBINING(c)) ||
2833 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 if (count++ > 100) {
2835 count = 0;
2836 GROW;
2837 }
2838 COPY_BUF(l,buf,len,c);
2839 NEXTL(l);
2840 c = CUR_CHAR(l);
2841 if (len >= XML_MAX_NAMELEN) {
2842 /*
2843 * Okay someone managed to make a huge token, so he's ready to pay
2844 * for the processing speed.
2845 */
2846 xmlChar *buffer;
2847 int max = len * 2;
2848
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002849 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002850 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 return(NULL);
2853 }
2854 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002855 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002856 (c == '.') || (c == '-') ||
2857 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002858 (IS_COMBINING(c)) ||
2859 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (count++ > 100) {
2861 count = 0;
2862 GROW;
2863 }
2864 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002865 xmlChar *tmp;
2866
Owen Taylor3473f882001-02-23 17:55:21 +00002867 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002868 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002869 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002870 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002871 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002872 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 return(NULL);
2874 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002875 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002876 }
2877 COPY_BUF(l,buffer,len,c);
2878 NEXTL(l);
2879 c = CUR_CHAR(l);
2880 }
2881 buffer[len] = 0;
2882 return(buffer);
2883 }
2884 }
2885 if (len == 0)
2886 return(NULL);
2887 return(xmlStrndup(buf, len));
2888}
2889
2890/**
2891 * xmlParseEntityValue:
2892 * @ctxt: an XML parser context
2893 * @orig: if non-NULL store a copy of the original entity value
2894 *
2895 * parse a value for ENTITY declarations
2896 *
2897 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2898 * "'" ([^%&'] | PEReference | Reference)* "'"
2899 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
2902
2903xmlChar *
2904xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2905 xmlChar *buf = NULL;
2906 int len = 0;
2907 int size = XML_PARSER_BUFFER_SIZE;
2908 int c, l;
2909 xmlChar stop;
2910 xmlChar *ret = NULL;
2911 const xmlChar *cur = NULL;
2912 xmlParserInputPtr input;
2913
2914 if (RAW == '"') stop = '"';
2915 else if (RAW == '\'') stop = '\'';
2916 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 return(NULL);
2919 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002920 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002921 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002922 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(NULL);
2924 }
2925
2926 /*
2927 * The content of the entity definition is copied in a buffer.
2928 */
2929
2930 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2931 input = ctxt->input;
2932 GROW;
2933 NEXT;
2934 c = CUR_CHAR(l);
2935 /*
2936 * NOTE: 4.4.5 Included in Literal
2937 * When a parameter entity reference appears in a literal entity
2938 * value, ... a single or double quote character in the replacement
2939 * text is always treated as a normal data character and will not
2940 * terminate the literal.
2941 * In practice it means we stop the loop only when back at parsing
2942 * the initial entity and the quote is found
2943 */
William M. Brack871611b2003-10-18 04:53:14 +00002944 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002945 (ctxt->input != input))) {
2946 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 xmlChar *tmp;
2948
Owen Taylor3473f882001-02-23 17:55:21 +00002949 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2951 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
2958 COPY_BUF(l,buf,len,c);
2959 NEXTL(l);
2960 /*
2961 * Pop-up of finished entities.
2962 */
2963 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2964 xmlPopInput(ctxt);
2965
2966 GROW;
2967 c = CUR_CHAR(l);
2968 if (c == 0) {
2969 GROW;
2970 c = CUR_CHAR(l);
2971 }
2972 }
2973 buf[len] = 0;
2974
2975 /*
2976 * Raise problem w.r.t. '&' and '%' being used in non-entities
2977 * reference constructs. Note Charref will be handled in
2978 * xmlStringDecodeEntities()
2979 */
2980 cur = buf;
2981 while (*cur != 0) { /* non input consuming */
2982 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2983 xmlChar *name;
2984 xmlChar tmp = *cur;
2985
2986 cur++;
2987 name = xmlParseStringName(ctxt, &cur);
2988 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002989 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002990 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002991 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002992 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002993 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2994 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 }
2997 if (name != NULL)
2998 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002999 if (*cur == 0)
3000 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003001 }
3002 cur++;
3003 }
3004
3005 /*
3006 * Then PEReference entities are substituted.
3007 */
3008 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003009 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003010 xmlFree(buf);
3011 } else {
3012 NEXT;
3013 /*
3014 * NOTE: 4.4.7 Bypassed
3015 * When a general entity reference appears in the EntityValue in
3016 * an entity declaration, it is bypassed and left as is.
3017 * so XML_SUBSTITUTE_REF is not set here.
3018 */
3019 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3020 0, 0, 0);
3021 if (orig != NULL)
3022 *orig = buf;
3023 else
3024 xmlFree(buf);
3025 }
3026
3027 return(ret);
3028}
3029
3030/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003031 * xmlParseAttValueComplex:
3032 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003033 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003034 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003035 *
3036 * parse a value for an attribute, this is the fallback function
3037 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003038 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003039 *
3040 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3041 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003042static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003043xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003044 xmlChar limit = 0;
3045 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 int len = 0;
3047 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 xmlChar *current = NULL;
3050 xmlEntityPtr ent;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (NXT(0) == '"') {
3053 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3054 limit = '"';
3055 NEXT;
3056 } else if (NXT(0) == '\'') {
3057 limit = '\'';
3058 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3059 NEXT;
3060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003061 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003062 return(NULL);
3063 }
3064
3065 /*
3066 * allocate a translation buffer.
3067 */
3068 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003070 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003071
3072 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003073 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003074 */
3075 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003076 while ((NXT(0) != limit) && /* checked */
3077 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003078 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003079 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (NXT(1) == '#') {
3082 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003083
Owen Taylor3473f882001-02-23 17:55:21 +00003084 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003085 if (ctxt->replaceEntities) {
3086 if (len > buf_size - 10) {
3087 growBuffer(buf);
3088 }
3089 buf[len++] = '&';
3090 } else {
3091 /*
3092 * The reparsing will be done in xmlStringGetNodeList()
3093 * called by the attribute() function in SAX.c
3094 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003095 if (len > buf_size - 10) {
3096 growBuffer(buf);
3097 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 buf[len++] = '&';
3099 buf[len++] = '#';
3100 buf[len++] = '3';
3101 buf[len++] = '8';
3102 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003103 }
3104 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003105 if (len > buf_size - 10) {
3106 growBuffer(buf);
3107 }
Owen Taylor3473f882001-02-23 17:55:21 +00003108 len += xmlCopyChar(0, &buf[len], val);
3109 }
3110 } else {
3111 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003112 if ((ent != NULL) &&
3113 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3114 if (len > buf_size - 10) {
3115 growBuffer(buf);
3116 }
3117 if ((ctxt->replaceEntities == 0) &&
3118 (ent->content[0] == '&')) {
3119 buf[len++] = '&';
3120 buf[len++] = '#';
3121 buf[len++] = '3';
3122 buf[len++] = '8';
3123 buf[len++] = ';';
3124 } else {
3125 buf[len++] = ent->content[0];
3126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003127 } else if ((ent != NULL) &&
3128 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003129 xmlChar *rep;
3130
3131 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3132 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003133 XML_SUBSTITUTE_REF,
3134 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003135 if (rep != NULL) {
3136 current = rep;
3137 while (*current != 0) { /* non input consuming */
3138 buf[len++] = *current++;
3139 if (len > buf_size - 10) {
3140 growBuffer(buf);
3141 }
3142 }
3143 xmlFree(rep);
3144 }
3145 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003146 if (len > buf_size - 10) {
3147 growBuffer(buf);
3148 }
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (ent->content != NULL)
3150 buf[len++] = ent->content[0];
3151 }
3152 } else if (ent != NULL) {
3153 int i = xmlStrlen(ent->name);
3154 const xmlChar *cur = ent->name;
3155
3156 /*
3157 * This may look absurd but is needed to detect
3158 * entities problems
3159 */
3160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3161 (ent->content != NULL)) {
3162 xmlChar *rep;
3163 rep = xmlStringDecodeEntities(ctxt, ent->content,
3164 XML_SUBSTITUTE_REF, 0, 0, 0);
3165 if (rep != NULL)
3166 xmlFree(rep);
3167 }
3168
3169 /*
3170 * Just output the reference
3171 */
3172 buf[len++] = '&';
3173 if (len > buf_size - i - 10) {
3174 growBuffer(buf);
3175 }
3176 for (;i > 0;i--)
3177 buf[len++] = *cur++;
3178 buf[len++] = ';';
3179 }
3180 }
3181 } else {
3182 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003183 if ((len != 0) || (!normalize)) {
3184 if ((!normalize) || (!in_space)) {
3185 COPY_BUF(l,buf,len,0x20);
3186 if (len > buf_size - 10) {
3187 growBuffer(buf);
3188 }
3189 }
3190 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003191 }
3192 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003193 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 COPY_BUF(l,buf,len,c);
3195 if (len > buf_size - 10) {
3196 growBuffer(buf);
3197 }
3198 }
3199 NEXTL(l);
3200 }
3201 GROW;
3202 c = CUR_CHAR(l);
3203 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003204 if ((in_space) && (normalize)) {
3205 while (buf[len - 1] == 0x20) len--;
3206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003209 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003210 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003211 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3212 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003213 } else
3214 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003215 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003216 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003217
3218mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003219 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003221}
3222
3223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224 * xmlParseAttValue:
3225 * @ctxt: an XML parser context
3226 *
3227 * parse a value for an attribute
3228 * Note: the parser won't do substitution of entities here, this
3229 * will be handled later in xmlStringGetNodeList
3230 *
3231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3232 * "'" ([^<&'] | Reference)* "'"
3233 *
3234 * 3.3.3 Attribute-Value Normalization:
3235 * Before the value of an attribute is passed to the application or
3236 * checked for validity, the XML processor must normalize it as follows:
3237 * - a character reference is processed by appending the referenced
3238 * character to the attribute value
3239 * - an entity reference is processed by recursively processing the
3240 * replacement text of the entity
3241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3242 * appending #x20 to the normalized value, except that only a single
3243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3244 * parsed entity or the literal entity value of an internal parsed entity
3245 * - other characters are processed by appending them to the normalized value
3246 * If the declared value is not CDATA, then the XML processor must further
3247 * process the normalized attribute value by discarding any leading and
3248 * trailing space (#x20) characters, and by replacing sequences of space
3249 * (#x20) characters by a single space (#x20) character.
3250 * All attributes for which no declaration has been read should be treated
3251 * by a non-validating parser as if declared CDATA.
3252 *
3253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3254 */
3255
3256
3257xmlChar *
3258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003261}
3262
3263/**
Owen Taylor3473f882001-02-23 17:55:21 +00003264 * xmlParseSystemLiteral:
3265 * @ctxt: an XML parser context
3266 *
3267 * parse an XML Literal
3268 *
3269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3270 *
3271 * Returns the SystemLiteral parsed or NULL
3272 */
3273
3274xmlChar *
3275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3276 xmlChar *buf = NULL;
3277 int len = 0;
3278 int size = XML_PARSER_BUFFER_SIZE;
3279 int cur, l;
3280 xmlChar stop;
3281 int state = ctxt->instate;
3282 int count = 0;
3283
3284 SHRINK;
3285 if (RAW == '"') {
3286 NEXT;
3287 stop = '"';
3288 } else if (RAW == '\'') {
3289 NEXT;
3290 stop = '\'';
3291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003293 return(NULL);
3294 }
3295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003299 return(NULL);
3300 }
3301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003305 xmlChar *tmp;
3306
Owen Taylor3473f882001-02-23 17:55:21 +00003307 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003308 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3309 if (tmp == NULL) {
3310 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003312 ctxt->instate = (xmlParserInputState) state;
3313 return(NULL);
3314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003316 }
3317 count++;
3318 if (count > 50) {
3319 GROW;
3320 count = 0;
3321 }
3322 COPY_BUF(l,buf,len,cur);
3323 NEXTL(l);
3324 cur = CUR_CHAR(l);
3325 if (cur == 0) {
3326 GROW;
3327 SHRINK;
3328 cur = CUR_CHAR(l);
3329 }
3330 }
3331 buf[len] = 0;
3332 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003333 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003334 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003335 } else {
3336 NEXT;
3337 }
3338 return(buf);
3339}
3340
3341/**
3342 * xmlParsePubidLiteral:
3343 * @ctxt: an XML parser context
3344 *
3345 * parse an XML public literal
3346 *
3347 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3348 *
3349 * Returns the PubidLiteral parsed or NULL.
3350 */
3351
3352xmlChar *
3353xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3354 xmlChar *buf = NULL;
3355 int len = 0;
3356 int size = XML_PARSER_BUFFER_SIZE;
3357 xmlChar cur;
3358 xmlChar stop;
3359 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003360 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 SHRINK;
3363 if (RAW == '"') {
3364 NEXT;
3365 stop = '"';
3366 } else if (RAW == '\'') {
3367 NEXT;
3368 stop = '\'';
3369 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003371 return(NULL);
3372 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003373 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003376 return(NULL);
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003380 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003382 xmlChar *tmp;
3383
Owen Taylor3473f882001-02-23 17:55:21 +00003384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003389 return(NULL);
3390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003392 }
3393 buf[len++] = cur;
3394 count++;
3395 if (count > 50) {
3396 GROW;
3397 count = 0;
3398 }
3399 NEXT;
3400 cur = CUR;
3401 if (cur == 0) {
3402 GROW;
3403 SHRINK;
3404 cur = CUR;
3405 }
3406 }
3407 buf[len] = 0;
3408 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003409 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003410 } else {
3411 NEXT;
3412 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003413 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003414 return(buf);
3415}
3416
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003418
3419/*
3420 * used for the test in the inner loop of the char data testing
3421 */
3422static const unsigned char test_char_data[256] = {
3423 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3424 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3425 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3426 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3427 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3428 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3430 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3431 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3432 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3433 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3434 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3435 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3436 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3437 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3438 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3439 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3440 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3441 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3443 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3448 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3449 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3450 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3451 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3452 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3453 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3454 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3455};
3456
Owen Taylor3473f882001-02-23 17:55:21 +00003457/**
3458 * xmlParseCharData:
3459 * @ctxt: an XML parser context
3460 * @cdata: int indicating whether we are within a CDATA section
3461 *
3462 * parse a CharData section.
3463 * if we are within a CDATA section ']]>' marks an end of section.
3464 *
3465 * The right angle bracket (>) may be represented using the string "&gt;",
3466 * and must, for compatibility, be escaped using "&gt;" or a character
3467 * reference when it appears in the string "]]>" in content, when that
3468 * string is not marking the end of a CDATA section.
3469 *
3470 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3471 */
3472
3473void
3474xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003475 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003477 int line = ctxt->input->line;
3478 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003479 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003480
3481 SHRINK;
3482 GROW;
3483 /*
3484 * Accelerated common case where input don't need to be
3485 * modified before passing it to the handler.
3486 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003487 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003488 in = ctxt->input->cur;
3489 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003490get_more_space:
3491 while (*in == 0x20) in++;
3492 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003493 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003494 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003495 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003496 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003497 goto get_more_space;
3498 }
3499 if (*in == '<') {
3500 nbchar = in - ctxt->input->cur;
3501 if (nbchar > 0) {
3502 const xmlChar *tmp = ctxt->input->cur;
3503 ctxt->input->cur = in;
3504
Daniel Veillard34099b42004-11-04 17:34:35 +00003505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->ignorableWhitespace !=
3507 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003509 if (ctxt->sax->ignorableWhitespace != NULL)
3510 ctxt->sax->ignorableWhitespace(ctxt->userData,
3511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003512 } else {
3513 if (ctxt->sax->characters != NULL)
3514 ctxt->sax->characters(ctxt->userData,
3515 tmp, nbchar);
3516 if (*ctxt->space == -1)
3517 *ctxt->space = -2;
3518 }
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 } else if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003521 ctxt->sax->characters(ctxt->userData,
3522 tmp, nbchar);
3523 }
3524 }
3525 return;
3526 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003527
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003528get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003529 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003530 while (test_char_data[*in]) {
3531 in++;
3532 ccol++;
3533 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003534 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003535 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003536 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003537 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003538 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003539 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003540 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003541 }
3542 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003543 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003544 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003545 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003546 return;
3547 }
3548 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003549 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003550 goto get_more;
3551 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003552 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003553 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003554 if ((ctxt->sax != NULL) &&
3555 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003556 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003557 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003558 const xmlChar *tmp = ctxt->input->cur;
3559 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003560
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003561 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003562 if (ctxt->sax->ignorableWhitespace != NULL)
3563 ctxt->sax->ignorableWhitespace(ctxt->userData,
3564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003565 } else {
3566 if (ctxt->sax->characters != NULL)
3567 ctxt->sax->characters(ctxt->userData,
3568 tmp, nbchar);
3569 if (*ctxt->space == -1)
3570 *ctxt->space = -2;
3571 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003572 line = ctxt->input->line;
3573 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003574 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003575 if (ctxt->sax->characters != NULL)
3576 ctxt->sax->characters(ctxt->userData,
3577 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003578 line = ctxt->input->line;
3579 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003580 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003581 }
3582 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003583 if (*in == 0xD) {
3584 in++;
3585 if (*in == 0xA) {
3586 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003587 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003588 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003589 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003590 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003591 in--;
3592 }
3593 if (*in == '<') {
3594 return;
3595 }
3596 if (*in == '&') {
3597 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003598 }
3599 SHRINK;
3600 GROW;
3601 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003602 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003603 nbchar = 0;
3604 }
Daniel Veillard50582112001-03-26 22:52:16 +00003605 ctxt->input->line = line;
3606 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003607 xmlParseCharDataComplex(ctxt, cdata);
3608}
3609
Daniel Veillard01c13b52002-12-10 15:19:08 +00003610/**
3611 * xmlParseCharDataComplex:
3612 * @ctxt: an XML parser context
3613 * @cdata: int indicating whether we are within a CDATA section
3614 *
3615 * parse a CharData section.this is the fallback function
3616 * of xmlParseCharData() when the parsing requires handling
3617 * of non-ASCII characters.
3618 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003619void
3620xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003621 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3622 int nbchar = 0;
3623 int cur, l;
3624 int count = 0;
3625
3626 SHRINK;
3627 GROW;
3628 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003629 while ((cur != '<') && /* checked */
3630 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003631 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 if ((cur == ']') && (NXT(1) == ']') &&
3633 (NXT(2) == '>')) {
3634 if (cdata) break;
3635 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003636 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003637 }
3638 }
3639 COPY_BUF(l,buf,nbchar,cur);
3640 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003641 buf[nbchar] = 0;
3642
Owen Taylor3473f882001-02-23 17:55:21 +00003643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003647 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003648 if (ctxt->sax->ignorableWhitespace != NULL)
3649 ctxt->sax->ignorableWhitespace(ctxt->userData,
3650 buf, nbchar);
3651 } else {
3652 if (ctxt->sax->characters != NULL)
3653 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003654 if ((ctxt->sax->characters !=
3655 ctxt->sax->ignorableWhitespace) &&
3656 (*ctxt->space == -1))
3657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003658 }
3659 }
3660 nbchar = 0;
3661 }
3662 count++;
3663 if (count > 50) {
3664 GROW;
3665 count = 0;
3666 }
3667 NEXTL(l);
3668 cur = CUR_CHAR(l);
3669 }
3670 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003671 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003672 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003673 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003674 */
3675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003676 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003677 if (ctxt->sax->ignorableWhitespace != NULL)
3678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3679 } else {
3680 if (ctxt->sax->characters != NULL)
3681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003682 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
3683 (*ctxt->space == -1))
3684 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00003685 }
3686 }
3687 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00003688 if ((cur != 0) && (!IS_CHAR(cur))) {
3689 /* Generate the error and skip the offending character */
3690 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
3691 "PCDATA invalid Char value %d\n",
3692 cur);
3693 NEXTL(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695}
3696
3697/**
3698 * xmlParseExternalID:
3699 * @ctxt: an XML parser context
3700 * @publicID: a xmlChar** receiving PubidLiteral
3701 * @strict: indicate whether we should restrict parsing to only
3702 * production [75], see NOTE below
3703 *
3704 * Parse an External ID or a Public ID
3705 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003706 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003707 * 'PUBLIC' S PubidLiteral S SystemLiteral
3708 *
3709 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3710 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3711 *
3712 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3713 *
3714 * Returns the function returns SystemLiteral and in the second
3715 * case publicID receives PubidLiteral, is strict is off
3716 * it is possible to return NULL and have publicID set.
3717 */
3718
3719xmlChar *
3720xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3721 xmlChar *URI = NULL;
3722
3723 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003724
3725 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003726 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003727 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3730 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003731 }
3732 SKIP_BLANKS;
3733 URI = xmlParseSystemLiteral(ctxt);
3734 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003737 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003741 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003742 }
3743 SKIP_BLANKS;
3744 *publicID = xmlParsePubidLiteral(ctxt);
3745 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003746 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 if (strict) {
3749 /*
3750 * We don't handle [83] so "S SystemLiteral" is required.
3751 */
William M. Brack76e95df2003-10-18 16:20:14 +00003752 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003754 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003755 }
3756 } else {
3757 /*
3758 * We handle [83] so we return immediately, if
3759 * "S SystemLiteral" is not detected. From a purely parsing
3760 * point of view that's a nice mess.
3761 */
3762 const xmlChar *ptr;
3763 GROW;
3764
3765 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003766 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767
William M. Brack76e95df2003-10-18 16:20:14 +00003768 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003769 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3770 }
3771 SKIP_BLANKS;
3772 URI = xmlParseSystemLiteral(ctxt);
3773 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003774 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
3776 }
3777 return(URI);
3778}
3779
3780/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003781 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003782 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003783 * @buf: the already parsed part of the buffer
3784 * @len: number of bytes filles in the buffer
3785 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003786 *
3787 * Skip an XML (SGML) comment <!-- .... -->
3788 * The spec says that "For compatibility, the string "--" (double-hyphen)
3789 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003790 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003791 *
3792 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3793 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003794static void
3795xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003796 int q, ql;
3797 int r, rl;
3798 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 xmlParserInputPtr input = ctxt->input;
3800 int count = 0;
3801
Owen Taylor3473f882001-02-23 17:55:21 +00003802 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003803 len = 0;
3804 size = XML_PARSER_BUFFER_SIZE;
3805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3806 if (buf == NULL) {
3807 xmlErrMemory(ctxt, NULL);
3808 return;
3809 }
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00003811 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00003812 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003813 if (q == 0)
3814 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003815 NEXTL(ql);
3816 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003817 if (r == 0)
3818 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 NEXTL(rl);
3820 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003821 if (cur == 0)
3822 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003823 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003824 ((cur != '>') ||
3825 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003826 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003827 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
3829 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003830 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003832 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3833 if (new_buf == NULL) {
3834 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003835 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return;
3837 }
William M. Bracka3215c72004-07-31 16:24:01 +00003838 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003839 }
3840 COPY_BUF(ql,buf,len,q);
3841 q = r;
3842 ql = rl;
3843 r = cur;
3844 rl = l;
3845
3846 count++;
3847 if (count > 50) {
3848 GROW;
3849 count = 0;
3850 }
3851 NEXTL(l);
3852 cur = CUR_CHAR(l);
3853 if (cur == 0) {
3854 SHRINK;
3855 GROW;
3856 cur = CUR_CHAR(l);
3857 }
3858 }
3859 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003860 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003862 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 xmlFree(buf);
3864 } else {
3865 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003866 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3867 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 NEXT;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3871 (!ctxt->disableSAX))
3872 ctxt->sax->comment(ctxt->userData, buf);
3873 xmlFree(buf);
3874 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003875 return;
3876not_terminated:
3877 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3878 "Comment not terminated\n", NULL);
3879 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003880}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003881/**
3882 * xmlParseComment:
3883 * @ctxt: an XML parser context
3884 *
3885 * Skip an XML (SGML) comment <!-- .... -->
3886 * The spec says that "For compatibility, the string "--" (double-hyphen)
3887 * must not occur within comments. "
3888 *
3889 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3890 */
3891void
3892xmlParseComment(xmlParserCtxtPtr ctxt) {
3893 xmlChar *buf = NULL;
3894 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003895 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003896 xmlParserInputState state;
3897 const xmlChar *in;
3898 int nbchar = 0, ccol;
3899
3900 /*
3901 * Check that there is a comment right here.
3902 */
3903 if ((RAW != '<') || (NXT(1) != '!') ||
3904 (NXT(2) != '-') || (NXT(3) != '-')) return;
3905
3906 state = ctxt->instate;
3907 ctxt->instate = XML_PARSER_COMMENT;
3908 SKIP(4);
3909 SHRINK;
3910 GROW;
3911
3912 /*
3913 * Accelerated common case where input don't need to be
3914 * modified before passing it to the handler.
3915 */
3916 in = ctxt->input->cur;
3917 do {
3918 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003919 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003920 ctxt->input->line++; ctxt->input->col = 1;
3921 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003922 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003923 }
3924get_more:
3925 ccol = ctxt->input->col;
3926 while (((*in > '-') && (*in <= 0x7F)) ||
3927 ((*in >= 0x20) && (*in < '-')) ||
3928 (*in == 0x09)) {
3929 in++;
3930 ccol++;
3931 }
3932 ctxt->input->col = ccol;
3933 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003934 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003935 ctxt->input->line++; ctxt->input->col = 1;
3936 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003937 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00003938 goto get_more;
3939 }
3940 nbchar = in - ctxt->input->cur;
3941 /*
3942 * save current set of data
3943 */
3944 if (nbchar > 0) {
3945 if ((ctxt->sax != NULL) &&
3946 (ctxt->sax->comment != NULL)) {
3947 if (buf == NULL) {
3948 if ((*in == '-') && (in[1] == '-'))
3949 size = nbchar + 1;
3950 else
3951 size = XML_PARSER_BUFFER_SIZE + nbchar;
3952 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3953 if (buf == NULL) {
3954 xmlErrMemory(ctxt, NULL);
3955 ctxt->instate = state;
3956 return;
3957 }
3958 len = 0;
3959 } else if (len + nbchar + 1 >= size) {
3960 xmlChar *new_buf;
3961 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3962 new_buf = (xmlChar *) xmlRealloc(buf,
3963 size * sizeof(xmlChar));
3964 if (new_buf == NULL) {
3965 xmlFree (buf);
3966 xmlErrMemory(ctxt, NULL);
3967 ctxt->instate = state;
3968 return;
3969 }
3970 buf = new_buf;
3971 }
3972 memcpy(&buf[len], ctxt->input->cur, nbchar);
3973 len += nbchar;
3974 buf[len] = 0;
3975 }
3976 }
3977 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00003978 if (*in == 0xA) {
3979 in++;
3980 ctxt->input->line++; ctxt->input->col = 1;
3981 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00003982 if (*in == 0xD) {
3983 in++;
3984 if (*in == 0xA) {
3985 ctxt->input->cur = in;
3986 in++;
3987 ctxt->input->line++; ctxt->input->col = 1;
3988 continue; /* while */
3989 }
3990 in--;
3991 }
3992 SHRINK;
3993 GROW;
3994 in = ctxt->input->cur;
3995 if (*in == '-') {
3996 if (in[1] == '-') {
3997 if (in[2] == '>') {
3998 SKIP(3);
3999 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4000 (!ctxt->disableSAX)) {
4001 if (buf != NULL)
4002 ctxt->sax->comment(ctxt->userData, buf);
4003 else
4004 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4005 }
4006 if (buf != NULL)
4007 xmlFree(buf);
4008 ctxt->instate = state;
4009 return;
4010 }
4011 if (buf != NULL)
4012 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4013 "Comment not terminated \n<!--%.50s\n",
4014 buf);
4015 else
4016 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4017 "Comment not terminated \n", NULL);
4018 in++;
4019 ctxt->input->col++;
4020 }
4021 in++;
4022 ctxt->input->col++;
4023 goto get_more;
4024 }
4025 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4026 xmlParseCommentComplex(ctxt, buf, len, size);
4027 ctxt->instate = state;
4028 return;
4029}
4030
Owen Taylor3473f882001-02-23 17:55:21 +00004031
4032/**
4033 * xmlParsePITarget:
4034 * @ctxt: an XML parser context
4035 *
4036 * parse the name of a PI
4037 *
4038 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4039 *
4040 * Returns the PITarget name or NULL
4041 */
4042
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004043const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004044xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004045 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004046
4047 name = xmlParseName(ctxt);
4048 if ((name != NULL) &&
4049 ((name[0] == 'x') || (name[0] == 'X')) &&
4050 ((name[1] == 'm') || (name[1] == 'M')) &&
4051 ((name[2] == 'l') || (name[2] == 'L'))) {
4052 int i;
4053 if ((name[0] == 'x') && (name[1] == 'm') &&
4054 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004055 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004056 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004057 return(name);
4058 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004059 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004060 return(name);
4061 }
4062 for (i = 0;;i++) {
4063 if (xmlW3CPIs[i] == NULL) break;
4064 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4065 return(name);
4066 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004067 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4068 "xmlParsePITarget: invalid name prefix 'xml'\n",
4069 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071 return(name);
4072}
4073
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004074#ifdef LIBXML_CATALOG_ENABLED
4075/**
4076 * xmlParseCatalogPI:
4077 * @ctxt: an XML parser context
4078 * @catalog: the PI value string
4079 *
4080 * parse an XML Catalog Processing Instruction.
4081 *
4082 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4083 *
4084 * Occurs only if allowed by the user and if happening in the Misc
4085 * part of the document before any doctype informations
4086 * This will add the given catalog to the parsing context in order
4087 * to be used if there is a resolution need further down in the document
4088 */
4089
4090static void
4091xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4092 xmlChar *URL = NULL;
4093 const xmlChar *tmp, *base;
4094 xmlChar marker;
4095
4096 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004097 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004098 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4099 goto error;
4100 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004101 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004102 if (*tmp != '=') {
4103 return;
4104 }
4105 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004106 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004107 marker = *tmp;
4108 if ((marker != '\'') && (marker != '"'))
4109 goto error;
4110 tmp++;
4111 base = tmp;
4112 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4113 if (*tmp == 0)
4114 goto error;
4115 URL = xmlStrndup(base, tmp - base);
4116 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004117 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004118 if (*tmp != 0)
4119 goto error;
4120
4121 if (URL != NULL) {
4122 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4123 xmlFree(URL);
4124 }
4125 return;
4126
4127error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004128 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4129 "Catalog PI syntax error: %s\n",
4130 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004131 if (URL != NULL)
4132 xmlFree(URL);
4133}
4134#endif
4135
Owen Taylor3473f882001-02-23 17:55:21 +00004136/**
4137 * xmlParsePI:
4138 * @ctxt: an XML parser context
4139 *
4140 * parse an XML Processing Instruction.
4141 *
4142 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4143 *
4144 * The processing is transfered to SAX once parsed.
4145 */
4146
4147void
4148xmlParsePI(xmlParserCtxtPtr ctxt) {
4149 xmlChar *buf = NULL;
4150 int len = 0;
4151 int size = XML_PARSER_BUFFER_SIZE;
4152 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004153 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004154 xmlParserInputState state;
4155 int count = 0;
4156
4157 if ((RAW == '<') && (NXT(1) == '?')) {
4158 xmlParserInputPtr input = ctxt->input;
4159 state = ctxt->instate;
4160 ctxt->instate = XML_PARSER_PI;
4161 /*
4162 * this is a Processing Instruction.
4163 */
4164 SKIP(2);
4165 SHRINK;
4166
4167 /*
4168 * Parse the target name and check for special support like
4169 * namespace.
4170 */
4171 target = xmlParsePITarget(ctxt);
4172 if (target != NULL) {
4173 if ((RAW == '?') && (NXT(1) == '>')) {
4174 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004175 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4176 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004177 }
4178 SKIP(2);
4179
4180 /*
4181 * SAX: PI detected.
4182 */
4183 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4184 (ctxt->sax->processingInstruction != NULL))
4185 ctxt->sax->processingInstruction(ctxt->userData,
4186 target, NULL);
4187 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 return;
4189 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004190 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004191 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004192 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004193 ctxt->instate = state;
4194 return;
4195 }
4196 cur = CUR;
4197 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004198 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4199 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 SKIP_BLANKS;
4202 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004203 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004204 ((cur != '?') || (NXT(1) != '>'))) {
4205 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004206 xmlChar *tmp;
4207
Owen Taylor3473f882001-02-23 17:55:21 +00004208 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004212 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004213 ctxt->instate = state;
4214 return;
4215 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004216 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 count++;
4219 if (count > 50) {
4220 GROW;
4221 count = 0;
4222 }
4223 COPY_BUF(l,buf,len,cur);
4224 NEXTL(l);
4225 cur = CUR_CHAR(l);
4226 if (cur == 0) {
4227 SHRINK;
4228 GROW;
4229 cur = CUR_CHAR(l);
4230 }
4231 }
4232 buf[len] = 0;
4233 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004234 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4235 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 } else {
4237 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004238 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4239 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 SKIP(2);
4242
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004243#ifdef LIBXML_CATALOG_ENABLED
4244 if (((state == XML_PARSER_MISC) ||
4245 (state == XML_PARSER_START)) &&
4246 (xmlStrEqual(target, XML_CATALOG_PI))) {
4247 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4248 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4249 (allow == XML_CATA_ALLOW_ALL))
4250 xmlParseCatalogPI(ctxt, buf);
4251 }
4252#endif
4253
4254
Owen Taylor3473f882001-02-23 17:55:21 +00004255 /*
4256 * SAX: PI detected.
4257 */
4258 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4259 (ctxt->sax->processingInstruction != NULL))
4260 ctxt->sax->processingInstruction(ctxt->userData,
4261 target, buf);
4262 }
4263 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004265 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 ctxt->instate = state;
4268 }
4269}
4270
4271/**
4272 * xmlParseNotationDecl:
4273 * @ctxt: an XML parser context
4274 *
4275 * parse a notation declaration
4276 *
4277 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4278 *
4279 * Hence there is actually 3 choices:
4280 * 'PUBLIC' S PubidLiteral
4281 * 'PUBLIC' S PubidLiteral S SystemLiteral
4282 * and 'SYSTEM' S SystemLiteral
4283 *
4284 * See the NOTE on xmlParseExternalID().
4285 */
4286
4287void
4288xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004289 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004290 xmlChar *Pubid;
4291 xmlChar *Systemid;
4292
Daniel Veillarda07050d2003-10-19 14:46:32 +00004293 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004294 xmlParserInputPtr input = ctxt->input;
4295 SHRINK;
4296 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004297 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004298 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4299 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return;
4301 }
4302 SKIP_BLANKS;
4303
Daniel Veillard76d66f42001-05-16 21:05:17 +00004304 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004305 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004306 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004307 return;
4308 }
William M. Brack76e95df2003-10-18 16:20:14 +00004309 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004311 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004312 return;
4313 }
4314 SKIP_BLANKS;
4315
4316 /*
4317 * Parse the IDs.
4318 */
4319 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4320 SKIP_BLANKS;
4321
4322 if (RAW == '>') {
4323 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004324 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4325 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004326 }
4327 NEXT;
4328 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4329 (ctxt->sax->notationDecl != NULL))
4330 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4331 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004332 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004333 }
Owen Taylor3473f882001-02-23 17:55:21 +00004334 if (Systemid != NULL) xmlFree(Systemid);
4335 if (Pubid != NULL) xmlFree(Pubid);
4336 }
4337}
4338
4339/**
4340 * xmlParseEntityDecl:
4341 * @ctxt: an XML parser context
4342 *
4343 * parse <!ENTITY declarations
4344 *
4345 * [70] EntityDecl ::= GEDecl | PEDecl
4346 *
4347 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4348 *
4349 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4350 *
4351 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4352 *
4353 * [74] PEDef ::= EntityValue | ExternalID
4354 *
4355 * [76] NDataDecl ::= S 'NDATA' S Name
4356 *
4357 * [ VC: Notation Declared ]
4358 * The Name must match the declared name of a notation.
4359 */
4360
4361void
4362xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004363 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 xmlChar *value = NULL;
4365 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004366 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004367 int isParameter = 0;
4368 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004369 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004370
Daniel Veillard4c778d82005-01-23 17:37:44 +00004371 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004372 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004373 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004374 SHRINK;
4375 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004376 skipped = SKIP_BLANKS;
4377 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004378 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4379 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004380 }
Owen Taylor3473f882001-02-23 17:55:21 +00004381
4382 if (RAW == '%') {
4383 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004384 skipped = SKIP_BLANKS;
4385 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4387 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004388 }
Owen Taylor3473f882001-02-23 17:55:21 +00004389 isParameter = 1;
4390 }
4391
Daniel Veillard76d66f42001-05-16 21:05:17 +00004392 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004394 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4395 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004396 return;
4397 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004398 skipped = SKIP_BLANKS;
4399 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004400 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4401 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004402 }
Owen Taylor3473f882001-02-23 17:55:21 +00004403
Daniel Veillardf5582f12002-06-11 10:08:16 +00004404 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004405 /*
4406 * handle the various case of definitions...
4407 */
4408 if (isParameter) {
4409 if ((RAW == '"') || (RAW == '\'')) {
4410 value = xmlParseEntityValue(ctxt, &orig);
4411 if (value) {
4412 if ((ctxt->sax != NULL) &&
4413 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4414 ctxt->sax->entityDecl(ctxt->userData, name,
4415 XML_INTERNAL_PARAMETER_ENTITY,
4416 NULL, NULL, value);
4417 }
4418 } else {
4419 URI = xmlParseExternalID(ctxt, &literal, 1);
4420 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004421 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004422 }
4423 if (URI) {
4424 xmlURIPtr uri;
4425
4426 uri = xmlParseURI((const char *) URI);
4427 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004428 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4429 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004430 /*
4431 * This really ought to be a well formedness error
4432 * but the XML Core WG decided otherwise c.f. issue
4433 * E26 of the XML erratas.
4434 */
Owen Taylor3473f882001-02-23 17:55:21 +00004435 } else {
4436 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004437 /*
4438 * Okay this is foolish to block those but not
4439 * invalid URIs.
4440 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004441 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004442 } else {
4443 if ((ctxt->sax != NULL) &&
4444 (!ctxt->disableSAX) &&
4445 (ctxt->sax->entityDecl != NULL))
4446 ctxt->sax->entityDecl(ctxt->userData, name,
4447 XML_EXTERNAL_PARAMETER_ENTITY,
4448 literal, URI, NULL);
4449 }
4450 xmlFreeURI(uri);
4451 }
4452 }
4453 }
4454 } else {
4455 if ((RAW == '"') || (RAW == '\'')) {
4456 value = xmlParseEntityValue(ctxt, &orig);
4457 if ((ctxt->sax != NULL) &&
4458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4459 ctxt->sax->entityDecl(ctxt->userData, name,
4460 XML_INTERNAL_GENERAL_ENTITY,
4461 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004462 /*
4463 * For expat compatibility in SAX mode.
4464 */
4465 if ((ctxt->myDoc == NULL) ||
4466 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4467 if (ctxt->myDoc == NULL) {
4468 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4469 }
4470 if (ctxt->myDoc->intSubset == NULL)
4471 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4472 BAD_CAST "fake", NULL, NULL);
4473
Daniel Veillard1af9a412003-08-20 22:54:39 +00004474 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4475 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004476 }
Owen Taylor3473f882001-02-23 17:55:21 +00004477 } else {
4478 URI = xmlParseExternalID(ctxt, &literal, 1);
4479 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004480 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004481 }
4482 if (URI) {
4483 xmlURIPtr uri;
4484
4485 uri = xmlParseURI((const char *)URI);
4486 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004487 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4488 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004489 /*
4490 * This really ought to be a well formedness error
4491 * but the XML Core WG decided otherwise c.f. issue
4492 * E26 of the XML erratas.
4493 */
Owen Taylor3473f882001-02-23 17:55:21 +00004494 } else {
4495 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004496 /*
4497 * Okay this is foolish to block those but not
4498 * invalid URIs.
4499 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004500 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004501 }
4502 xmlFreeURI(uri);
4503 }
4504 }
William M. Brack76e95df2003-10-18 16:20:14 +00004505 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4507 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004508 }
4509 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004510 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004511 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004512 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004513 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4514 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004515 }
4516 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004517 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004518 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4519 (ctxt->sax->unparsedEntityDecl != NULL))
4520 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4521 literal, URI, ndata);
4522 } else {
4523 if ((ctxt->sax != NULL) &&
4524 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4525 ctxt->sax->entityDecl(ctxt->userData, name,
4526 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4527 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004528 /*
4529 * For expat compatibility in SAX mode.
4530 * assuming the entity repalcement was asked for
4531 */
4532 if ((ctxt->replaceEntities != 0) &&
4533 ((ctxt->myDoc == NULL) ||
4534 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4535 if (ctxt->myDoc == NULL) {
4536 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4537 }
4538
4539 if (ctxt->myDoc->intSubset == NULL)
4540 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4541 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004542 xmlSAX2EntityDecl(ctxt, name,
4543 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4544 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004545 }
Owen Taylor3473f882001-02-23 17:55:21 +00004546 }
4547 }
4548 }
4549 SKIP_BLANKS;
4550 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004551 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004552 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004553 } else {
4554 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004555 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4556 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558 NEXT;
4559 }
4560 if (orig != NULL) {
4561 /*
4562 * Ugly mechanism to save the raw entity value.
4563 */
4564 xmlEntityPtr cur = NULL;
4565
4566 if (isParameter) {
4567 if ((ctxt->sax != NULL) &&
4568 (ctxt->sax->getParameterEntity != NULL))
4569 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4570 } else {
4571 if ((ctxt->sax != NULL) &&
4572 (ctxt->sax->getEntity != NULL))
4573 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004574 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004575 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004576 }
Owen Taylor3473f882001-02-23 17:55:21 +00004577 }
4578 if (cur != NULL) {
4579 if (cur->orig != NULL)
4580 xmlFree(orig);
4581 else
4582 cur->orig = orig;
4583 } else
4584 xmlFree(orig);
4585 }
Owen Taylor3473f882001-02-23 17:55:21 +00004586 if (value != NULL) xmlFree(value);
4587 if (URI != NULL) xmlFree(URI);
4588 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004589 }
4590}
4591
4592/**
4593 * xmlParseDefaultDecl:
4594 * @ctxt: an XML parser context
4595 * @value: Receive a possible fixed default value for the attribute
4596 *
4597 * Parse an attribute default declaration
4598 *
4599 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4600 *
4601 * [ VC: Required Attribute ]
4602 * if the default declaration is the keyword #REQUIRED, then the
4603 * attribute must be specified for all elements of the type in the
4604 * attribute-list declaration.
4605 *
4606 * [ VC: Attribute Default Legal ]
4607 * The declared default value must meet the lexical constraints of
4608 * the declared attribute type c.f. xmlValidateAttributeDecl()
4609 *
4610 * [ VC: Fixed Attribute Default ]
4611 * if an attribute has a default value declared with the #FIXED
4612 * keyword, instances of that attribute must match the default value.
4613 *
4614 * [ WFC: No < in Attribute Values ]
4615 * handled in xmlParseAttValue()
4616 *
4617 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4618 * or XML_ATTRIBUTE_FIXED.
4619 */
4620
4621int
4622xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4623 int val;
4624 xmlChar *ret;
4625
4626 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004627 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(9);
4629 return(XML_ATTRIBUTE_REQUIRED);
4630 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004631 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004632 SKIP(8);
4633 return(XML_ATTRIBUTE_IMPLIED);
4634 }
4635 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004636 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004637 SKIP(6);
4638 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004639 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4641 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 SKIP_BLANKS;
4644 }
4645 ret = xmlParseAttValue(ctxt);
4646 ctxt->instate = XML_PARSER_DTD;
4647 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004648 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004649 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004650 } else
4651 *value = ret;
4652 return(val);
4653}
4654
4655/**
4656 * xmlParseNotationType:
4657 * @ctxt: an XML parser context
4658 *
4659 * parse an Notation attribute type.
4660 *
4661 * Note: the leading 'NOTATION' S part has already being parsed...
4662 *
4663 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4664 *
4665 * [ VC: Notation Attributes ]
4666 * Values of this type must match one of the notation names included
4667 * in the declaration; all notation names in the declaration must be declared.
4668 *
4669 * Returns: the notation attribute tree built while parsing
4670 */
4671
4672xmlEnumerationPtr
4673xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004674 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004675 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4676
4677 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004678 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004679 return(NULL);
4680 }
4681 SHRINK;
4682 do {
4683 NEXT;
4684 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004685 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4688 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004689 return(ret);
4690 }
4691 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004692 if (cur == NULL) return(ret);
4693 if (last == NULL) ret = last = cur;
4694 else {
4695 last->next = cur;
4696 last = cur;
4697 }
4698 SKIP_BLANKS;
4699 } while (RAW == '|');
4700 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004702 if ((last != NULL) && (last != ret))
4703 xmlFreeEnumeration(last);
4704 return(ret);
4705 }
4706 NEXT;
4707 return(ret);
4708}
4709
4710/**
4711 * xmlParseEnumerationType:
4712 * @ctxt: an XML parser context
4713 *
4714 * parse an Enumeration attribute type.
4715 *
4716 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4717 *
4718 * [ VC: Enumeration ]
4719 * Values of this type must match one of the Nmtoken tokens in
4720 * the declaration
4721 *
4722 * Returns: the enumeration attribute tree built while parsing
4723 */
4724
4725xmlEnumerationPtr
4726xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4727 xmlChar *name;
4728 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4729
4730 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004731 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004732 return(NULL);
4733 }
4734 SHRINK;
4735 do {
4736 NEXT;
4737 SKIP_BLANKS;
4738 name = xmlParseNmtoken(ctxt);
4739 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004740 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004741 return(ret);
4742 }
4743 cur = xmlCreateEnumeration(name);
4744 xmlFree(name);
4745 if (cur == NULL) return(ret);
4746 if (last == NULL) ret = last = cur;
4747 else {
4748 last->next = cur;
4749 last = cur;
4750 }
4751 SKIP_BLANKS;
4752 } while (RAW == '|');
4753 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004754 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004755 return(ret);
4756 }
4757 NEXT;
4758 return(ret);
4759}
4760
4761/**
4762 * xmlParseEnumeratedType:
4763 * @ctxt: an XML parser context
4764 * @tree: the enumeration tree built while parsing
4765 *
4766 * parse an Enumerated attribute type.
4767 *
4768 * [57] EnumeratedType ::= NotationType | Enumeration
4769 *
4770 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4771 *
4772 *
4773 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4774 */
4775
4776int
4777xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004778 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004779 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004780 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4782 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004783 return(0);
4784 }
4785 SKIP_BLANKS;
4786 *tree = xmlParseNotationType(ctxt);
4787 if (*tree == NULL) return(0);
4788 return(XML_ATTRIBUTE_NOTATION);
4789 }
4790 *tree = xmlParseEnumerationType(ctxt);
4791 if (*tree == NULL) return(0);
4792 return(XML_ATTRIBUTE_ENUMERATION);
4793}
4794
4795/**
4796 * xmlParseAttributeType:
4797 * @ctxt: an XML parser context
4798 * @tree: the enumeration tree built while parsing
4799 *
4800 * parse the Attribute list def for an element
4801 *
4802 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4803 *
4804 * [55] StringType ::= 'CDATA'
4805 *
4806 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4807 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4808 *
4809 * Validity constraints for attribute values syntax are checked in
4810 * xmlValidateAttributeValue()
4811 *
4812 * [ VC: ID ]
4813 * Values of type ID must match the Name production. A name must not
4814 * appear more than once in an XML document as a value of this type;
4815 * i.e., ID values must uniquely identify the elements which bear them.
4816 *
4817 * [ VC: One ID per Element Type ]
4818 * No element type may have more than one ID attribute specified.
4819 *
4820 * [ VC: ID Attribute Default ]
4821 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4822 *
4823 * [ VC: IDREF ]
4824 * Values of type IDREF must match the Name production, and values
4825 * of type IDREFS must match Names; each IDREF Name must match the value
4826 * of an ID attribute on some element in the XML document; i.e. IDREF
4827 * values must match the value of some ID attribute.
4828 *
4829 * [ VC: Entity Name ]
4830 * Values of type ENTITY must match the Name production, values
4831 * of type ENTITIES must match Names; each Entity Name must match the
4832 * name of an unparsed entity declared in the DTD.
4833 *
4834 * [ VC: Name Token ]
4835 * Values of type NMTOKEN must match the Nmtoken production; values
4836 * of type NMTOKENS must match Nmtokens.
4837 *
4838 * Returns the attribute type
4839 */
4840int
4841xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4842 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004843 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004844 SKIP(5);
4845 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004846 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004847 SKIP(6);
4848 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004849 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004850 SKIP(5);
4851 return(XML_ATTRIBUTE_IDREF);
4852 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4853 SKIP(2);
4854 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004855 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004856 SKIP(6);
4857 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004858 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004859 SKIP(8);
4860 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004861 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004862 SKIP(8);
4863 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004864 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004865 SKIP(7);
4866 return(XML_ATTRIBUTE_NMTOKEN);
4867 }
4868 return(xmlParseEnumeratedType(ctxt, tree));
4869}
4870
4871/**
4872 * xmlParseAttributeListDecl:
4873 * @ctxt: an XML parser context
4874 *
4875 * : parse the Attribute list def for an element
4876 *
4877 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4878 *
4879 * [53] AttDef ::= S Name S AttType S DefaultDecl
4880 *
4881 */
4882void
4883xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004884 const xmlChar *elemName;
4885 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004886 xmlEnumerationPtr tree;
4887
Daniel Veillarda07050d2003-10-19 14:46:32 +00004888 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004889 xmlParserInputPtr input = ctxt->input;
4890
4891 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004892 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004893 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004894 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004895 }
4896 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004897 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004898 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4900 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004901 return;
4902 }
4903 SKIP_BLANKS;
4904 GROW;
4905 while (RAW != '>') {
4906 const xmlChar *check = CUR_PTR;
4907 int type;
4908 int def;
4909 xmlChar *defaultValue = NULL;
4910
4911 GROW;
4912 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004913 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4916 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 break;
4918 }
4919 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004920 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004921 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004922 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004923 break;
4924 }
4925 SKIP_BLANKS;
4926
4927 type = xmlParseAttributeType(ctxt, &tree);
4928 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004929 break;
4930 }
4931
4932 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004933 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4935 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004936 if (tree != NULL)
4937 xmlFreeEnumeration(tree);
4938 break;
4939 }
4940 SKIP_BLANKS;
4941
4942 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4943 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004944 if (defaultValue != NULL)
4945 xmlFree(defaultValue);
4946 if (tree != NULL)
4947 xmlFreeEnumeration(tree);
4948 break;
4949 }
4950
4951 GROW;
4952 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004953 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004954 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004955 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (defaultValue != NULL)
4957 xmlFree(defaultValue);
4958 if (tree != NULL)
4959 xmlFreeEnumeration(tree);
4960 break;
4961 }
4962 SKIP_BLANKS;
4963 }
4964 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004965 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4966 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 if (defaultValue != NULL)
4968 xmlFree(defaultValue);
4969 if (tree != NULL)
4970 xmlFreeEnumeration(tree);
4971 break;
4972 }
4973 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4974 (ctxt->sax->attributeDecl != NULL))
4975 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4976 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004977 else if (tree != NULL)
4978 xmlFreeEnumeration(tree);
4979
4980 if ((ctxt->sax2) && (defaultValue != NULL) &&
4981 (def != XML_ATTRIBUTE_IMPLIED) &&
4982 (def != XML_ATTRIBUTE_REQUIRED)) {
4983 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4984 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004985 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4986 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4987 }
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (defaultValue != NULL)
4989 xmlFree(defaultValue);
4990 GROW;
4991 }
4992 if (RAW == '>') {
4993 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004994 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4995 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004996 }
4997 NEXT;
4998 }
Owen Taylor3473f882001-02-23 17:55:21 +00004999 }
5000}
5001
5002/**
5003 * xmlParseElementMixedContentDecl:
5004 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005005 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005006 *
5007 * parse the declaration for a Mixed Element content
5008 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5009 *
5010 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5011 * '(' S? '#PCDATA' S? ')'
5012 *
5013 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5014 *
5015 * [ VC: No Duplicate Types ]
5016 * The same name must not appear more than once in a single
5017 * mixed-content declaration.
5018 *
5019 * returns: the list of the xmlElementContentPtr describing the element choices
5020 */
5021xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005022xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005023 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005024 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005025
5026 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005027 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005028 SKIP(7);
5029 SKIP_BLANKS;
5030 SHRINK;
5031 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005032 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005033 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5034"Element content declaration doesn't start and stop in the same entity\n",
5035 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005036 }
Owen Taylor3473f882001-02-23 17:55:21 +00005037 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005038 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005039 if (RAW == '*') {
5040 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5041 NEXT;
5042 }
5043 return(ret);
5044 }
5045 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005046 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005047 if (ret == NULL) return(NULL);
5048 }
5049 while (RAW == '|') {
5050 NEXT;
5051 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005052 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005053 if (ret == NULL) return(NULL);
5054 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005055 if (cur != NULL)
5056 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005057 cur = ret;
5058 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005059 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005060 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005061 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005062 if (n->c1 != NULL)
5063 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005064 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005065 if (n != NULL)
5066 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005067 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005068 }
5069 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005070 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005072 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005073 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005074 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005075 return(NULL);
5076 }
5077 SKIP_BLANKS;
5078 GROW;
5079 }
5080 if ((RAW == ')') && (NXT(1) == '*')) {
5081 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005082 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005083 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005084 if (cur->c2 != NULL)
5085 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005086 }
5087 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005088 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005089 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5090"Element content declaration doesn't start and stop in the same entity\n",
5091 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005092 }
Owen Taylor3473f882001-02-23 17:55:21 +00005093 SKIP(2);
5094 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005095 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005096 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 return(NULL);
5098 }
5099
5100 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005101 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
5103 return(ret);
5104}
5105
5106/**
5107 * xmlParseElementChildrenContentDecl:
5108 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005109 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005110 *
5111 * parse the declaration for a Mixed Element content
5112 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5113 *
5114 *
5115 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5116 *
5117 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5118 *
5119 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5120 *
5121 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5122 *
5123 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5124 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005125 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005126 * opening or closing parentheses in a choice, seq, or Mixed
5127 * construct is contained in the replacement text for a parameter
5128 * entity, both must be contained in the same replacement text. For
5129 * interoperability, if a parameter-entity reference appears in a
5130 * choice, seq, or Mixed construct, its replacement text should not
5131 * be empty, and neither the first nor last non-blank character of
5132 * the replacement text should be a connector (| or ,).
5133 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005134 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005135 * hierarchy.
5136 */
5137xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005139 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005140 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005141 xmlChar type = 0;
5142
5143 SKIP_BLANKS;
5144 GROW;
5145 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005146 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005147
Owen Taylor3473f882001-02-23 17:55:21 +00005148 /* Recurse on first child */
5149 NEXT;
5150 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005151 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005152 SKIP_BLANKS;
5153 GROW;
5154 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005155 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005157 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005158 return(NULL);
5159 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005160 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005161 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005162 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005163 return(NULL);
5164 }
Owen Taylor3473f882001-02-23 17:55:21 +00005165 GROW;
5166 if (RAW == '?') {
5167 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5168 NEXT;
5169 } else if (RAW == '*') {
5170 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5171 NEXT;
5172 } else if (RAW == '+') {
5173 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5174 NEXT;
5175 } else {
5176 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178 GROW;
5179 }
5180 SKIP_BLANKS;
5181 SHRINK;
5182 while (RAW != ')') {
5183 /*
5184 * Each loop we parse one separator and one element.
5185 */
5186 if (RAW == ',') {
5187 if (type == 0) type = CUR;
5188
5189 /*
5190 * Detect "Name | Name , Name" error
5191 */
5192 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005193 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005194 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005196 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005197 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005199 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 return(NULL);
5201 }
5202 NEXT;
5203
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005206 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005207 xmlFreeDocElementContent(ctxt->myDoc, last);
5208 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 return(NULL);
5210 }
5211 if (last == NULL) {
5212 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005213 if (ret != NULL)
5214 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005215 ret = cur = op;
5216 } else {
5217 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005218 if (op != NULL)
5219 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005220 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005221 if (last != NULL)
5222 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005223 cur =op;
5224 last = NULL;
5225 }
5226 } else if (RAW == '|') {
5227 if (type == 0) type = CUR;
5228
5229 /*
5230 * Detect "Name , Name | Name" error
5231 */
5232 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005233 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005234 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005235 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005236 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005237 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005238 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005239 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 return(NULL);
5241 }
5242 NEXT;
5243
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005244 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005246 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005247 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005248 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005249 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005250 return(NULL);
5251 }
5252 if (last == NULL) {
5253 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005254 if (ret != NULL)
5255 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005256 ret = cur = op;
5257 } else {
5258 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005259 if (op != NULL)
5260 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005262 if (last != NULL)
5263 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005264 cur =op;
5265 last = NULL;
5266 }
5267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005268 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005269 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005270 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 return(NULL);
5272 }
5273 GROW;
5274 SKIP_BLANKS;
5275 GROW;
5276 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 /* Recurse on second child */
5279 NEXT;
5280 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005281 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 SKIP_BLANKS;
5283 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005284 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005285 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005287 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005288 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005289 return(NULL);
5290 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005291 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 if (RAW == '?') {
5293 last->ocur = XML_ELEMENT_CONTENT_OPT;
5294 NEXT;
5295 } else if (RAW == '*') {
5296 last->ocur = XML_ELEMENT_CONTENT_MULT;
5297 NEXT;
5298 } else if (RAW == '+') {
5299 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5300 NEXT;
5301 } else {
5302 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5303 }
5304 }
5305 SKIP_BLANKS;
5306 GROW;
5307 }
5308 if ((cur != NULL) && (last != NULL)) {
5309 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005310 if (last != NULL)
5311 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005312 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005313 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005314 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5315"Element content declaration doesn't start and stop in the same entity\n",
5316 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005317 }
Owen Taylor3473f882001-02-23 17:55:21 +00005318 NEXT;
5319 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005320 if (ret != NULL) {
5321 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5322 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5323 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5324 else
5325 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5326 }
Owen Taylor3473f882001-02-23 17:55:21 +00005327 NEXT;
5328 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005329 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005330 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005331 cur = ret;
5332 /*
5333 * Some normalization:
5334 * (a | b* | c?)* == (a | b | c)*
5335 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005336 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005337 if ((cur->c1 != NULL) &&
5338 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5339 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5340 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5341 if ((cur->c2 != NULL) &&
5342 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5343 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5344 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5345 cur = cur->c2;
5346 }
5347 }
Owen Taylor3473f882001-02-23 17:55:21 +00005348 NEXT;
5349 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005350 if (ret != NULL) {
5351 int found = 0;
5352
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005353 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5354 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5355 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005356 else
5357 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005358 /*
5359 * Some normalization:
5360 * (a | b*)+ == (a | b)*
5361 * (a | b?)+ == (a | b)*
5362 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005363 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005364 if ((cur->c1 != NULL) &&
5365 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5366 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5367 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5368 found = 1;
5369 }
5370 if ((cur->c2 != NULL) &&
5371 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5372 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5373 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5374 found = 1;
5375 }
5376 cur = cur->c2;
5377 }
5378 if (found)
5379 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5380 }
Owen Taylor3473f882001-02-23 17:55:21 +00005381 NEXT;
5382 }
5383 return(ret);
5384}
5385
5386/**
5387 * xmlParseElementContentDecl:
5388 * @ctxt: an XML parser context
5389 * @name: the name of the element being defined.
5390 * @result: the Element Content pointer will be stored here if any
5391 *
5392 * parse the declaration for an Element content either Mixed or Children,
5393 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5394 *
5395 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5396 *
5397 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5398 */
5399
5400int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005401xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005402 xmlElementContentPtr *result) {
5403
5404 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005405 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005406 int res;
5407
5408 *result = NULL;
5409
5410 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005411 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005412 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 return(-1);
5414 }
5415 NEXT;
5416 GROW;
5417 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005418 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 res = XML_ELEMENT_TYPE_MIXED;
5421 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005423 res = XML_ELEMENT_TYPE_ELEMENT;
5424 }
Owen Taylor3473f882001-02-23 17:55:21 +00005425 SKIP_BLANKS;
5426 *result = tree;
5427 return(res);
5428}
5429
5430/**
5431 * xmlParseElementDecl:
5432 * @ctxt: an XML parser context
5433 *
5434 * parse an Element declaration.
5435 *
5436 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5437 *
5438 * [ VC: Unique Element Type Declaration ]
5439 * No element type may be declared more than once
5440 *
5441 * Returns the type of the element, or -1 in case of error
5442 */
5443int
5444xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005445 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005446 int ret = -1;
5447 xmlElementContentPtr content = NULL;
5448
Daniel Veillard4c778d82005-01-23 17:37:44 +00005449 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005450 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005451 xmlParserInputPtr input = ctxt->input;
5452
5453 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5456 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005457 }
5458 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005459 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5462 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005463 return(-1);
5464 }
5465 while ((RAW == 0) && (ctxt->inputNr > 1))
5466 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005467 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005468 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5469 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
5471 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005472 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005473 SKIP(5);
5474 /*
5475 * Element must always be empty.
5476 */
5477 ret = XML_ELEMENT_TYPE_EMPTY;
5478 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5479 (NXT(2) == 'Y')) {
5480 SKIP(3);
5481 /*
5482 * Element is a generic container.
5483 */
5484 ret = XML_ELEMENT_TYPE_ANY;
5485 } else if (RAW == '(') {
5486 ret = xmlParseElementContentDecl(ctxt, name, &content);
5487 } else {
5488 /*
5489 * [ WFC: PEs in Internal Subset ] error handling.
5490 */
5491 if ((RAW == '%') && (ctxt->external == 0) &&
5492 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005494 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005495 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005496 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005497 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5498 }
Owen Taylor3473f882001-02-23 17:55:21 +00005499 return(-1);
5500 }
5501
5502 SKIP_BLANKS;
5503 /*
5504 * Pop-up of finished entities.
5505 */
5506 while ((RAW == 0) && (ctxt->inputNr > 1))
5507 xmlPopInput(ctxt);
5508 SKIP_BLANKS;
5509
5510 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005511 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005512 if (content != NULL) {
5513 xmlFreeDocElementContent(ctxt->myDoc, content);
5514 }
Owen Taylor3473f882001-02-23 17:55:21 +00005515 } else {
5516 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005517 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5518 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005519 }
5520
5521 NEXT;
5522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005523 (ctxt->sax->elementDecl != NULL)) {
5524 if (content != NULL)
5525 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005526 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5527 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005528 if ((content != NULL) && (content->parent == NULL)) {
5529 /*
5530 * this is a trick: if xmlAddElementDecl is called,
5531 * instead of copying the full tree it is plugged directly
5532 * if called from the parser. Avoid duplicating the
5533 * interfaces or change the API/ABI
5534 */
5535 xmlFreeDocElementContent(ctxt->myDoc, content);
5536 }
5537 } else if (content != NULL) {
5538 xmlFreeDocElementContent(ctxt->myDoc, content);
5539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
Owen Taylor3473f882001-02-23 17:55:21 +00005541 }
5542 return(ret);
5543}
5544
5545/**
Owen Taylor3473f882001-02-23 17:55:21 +00005546 * xmlParseConditionalSections
5547 * @ctxt: an XML parser context
5548 *
5549 * [61] conditionalSect ::= includeSect | ignoreSect
5550 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5551 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5552 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5553 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5554 */
5555
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005556static void
Owen Taylor3473f882001-02-23 17:55:21 +00005557xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5558 SKIP(3);
5559 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 SKIP(7);
5562 SKIP_BLANKS;
5563 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005564 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005565 } else {
5566 NEXT;
5567 }
5568 if (xmlParserDebugEntities) {
5569 if ((ctxt->input != NULL) && (ctxt->input->filename))
5570 xmlGenericError(xmlGenericErrorContext,
5571 "%s(%d): ", ctxt->input->filename,
5572 ctxt->input->line);
5573 xmlGenericError(xmlGenericErrorContext,
5574 "Entering INCLUDE Conditional Section\n");
5575 }
5576
5577 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5578 (NXT(2) != '>'))) {
5579 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005580 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005581
5582 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5583 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005585 NEXT;
5586 } else if (RAW == '%') {
5587 xmlParsePEReference(ctxt);
5588 } else
5589 xmlParseMarkupDecl(ctxt);
5590
5591 /*
5592 * Pop-up of finished entities.
5593 */
5594 while ((RAW == 0) && (ctxt->inputNr > 1))
5595 xmlPopInput(ctxt);
5596
Daniel Veillardfdc91562002-07-01 21:52:03 +00005597 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005598 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005599 break;
5600 }
5601 }
5602 if (xmlParserDebugEntities) {
5603 if ((ctxt->input != NULL) && (ctxt->input->filename))
5604 xmlGenericError(xmlGenericErrorContext,
5605 "%s(%d): ", ctxt->input->filename,
5606 ctxt->input->line);
5607 xmlGenericError(xmlGenericErrorContext,
5608 "Leaving INCLUDE Conditional Section\n");
5609 }
5610
Daniel Veillarda07050d2003-10-19 14:46:32 +00005611 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005612 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005613 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005614 int depth = 0;
5615
5616 SKIP(6);
5617 SKIP_BLANKS;
5618 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005619 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005620 } else {
5621 NEXT;
5622 }
5623 if (xmlParserDebugEntities) {
5624 if ((ctxt->input != NULL) && (ctxt->input->filename))
5625 xmlGenericError(xmlGenericErrorContext,
5626 "%s(%d): ", ctxt->input->filename,
5627 ctxt->input->line);
5628 xmlGenericError(xmlGenericErrorContext,
5629 "Entering IGNORE Conditional Section\n");
5630 }
5631
5632 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005633 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005634 * But disable SAX event generating DTD building in the meantime
5635 */
5636 state = ctxt->disableSAX;
5637 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005638 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005639 ctxt->instate = XML_PARSER_IGNORE;
5640
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005641 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5643 depth++;
5644 SKIP(3);
5645 continue;
5646 }
5647 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5648 if (--depth >= 0) SKIP(3);
5649 continue;
5650 }
5651 NEXT;
5652 continue;
5653 }
5654
5655 ctxt->disableSAX = state;
5656 ctxt->instate = instate;
5657
5658 if (xmlParserDebugEntities) {
5659 if ((ctxt->input != NULL) && (ctxt->input->filename))
5660 xmlGenericError(xmlGenericErrorContext,
5661 "%s(%d): ", ctxt->input->filename,
5662 ctxt->input->line);
5663 xmlGenericError(xmlGenericErrorContext,
5664 "Leaving IGNORE Conditional Section\n");
5665 }
5666
5667 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005668 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005669 }
5670
5671 if (RAW == 0)
5672 SHRINK;
5673
5674 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005675 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 } else {
5677 SKIP(3);
5678 }
5679}
5680
5681/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005682 * xmlParseMarkupDecl:
5683 * @ctxt: an XML parser context
5684 *
5685 * parse Markup declarations
5686 *
5687 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5688 * NotationDecl | PI | Comment
5689 *
5690 * [ VC: Proper Declaration/PE Nesting ]
5691 * Parameter-entity replacement text must be properly nested with
5692 * markup declarations. That is to say, if either the first character
5693 * or the last character of a markup declaration (markupdecl above) is
5694 * contained in the replacement text for a parameter-entity reference,
5695 * both must be contained in the same replacement text.
5696 *
5697 * [ WFC: PEs in Internal Subset ]
5698 * In the internal DTD subset, parameter-entity references can occur
5699 * only where markup declarations can occur, not within markup declarations.
5700 * (This does not apply to references that occur in external parameter
5701 * entities or to the external subset.)
5702 */
5703void
5704xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5705 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005706 if (CUR == '<') {
5707 if (NXT(1) == '!') {
5708 switch (NXT(2)) {
5709 case 'E':
5710 if (NXT(3) == 'L')
5711 xmlParseElementDecl(ctxt);
5712 else if (NXT(3) == 'N')
5713 xmlParseEntityDecl(ctxt);
5714 break;
5715 case 'A':
5716 xmlParseAttributeListDecl(ctxt);
5717 break;
5718 case 'N':
5719 xmlParseNotationDecl(ctxt);
5720 break;
5721 case '-':
5722 xmlParseComment(ctxt);
5723 break;
5724 default:
5725 /* there is an error but it will be detected later */
5726 break;
5727 }
5728 } else if (NXT(1) == '?') {
5729 xmlParsePI(ctxt);
5730 }
5731 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005732 /*
5733 * This is only for internal subset. On external entities,
5734 * the replacement is done before parsing stage
5735 */
5736 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5737 xmlParsePEReference(ctxt);
5738
5739 /*
5740 * Conditional sections are allowed from entities included
5741 * by PE References in the internal subset.
5742 */
5743 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5744 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5745 xmlParseConditionalSections(ctxt);
5746 }
5747 }
5748
5749 ctxt->instate = XML_PARSER_DTD;
5750}
5751
5752/**
5753 * xmlParseTextDecl:
5754 * @ctxt: an XML parser context
5755 *
5756 * parse an XML declaration header for external entities
5757 *
5758 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5759 *
5760 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5761 */
5762
5763void
5764xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5765 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005766 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005767
5768 /*
5769 * We know that '<?xml' is here.
5770 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005771 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772 SKIP(5);
5773 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005774 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005775 return;
5776 }
5777
William M. Brack76e95df2003-10-18 16:20:14 +00005778 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005779 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5780 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005781 }
5782 SKIP_BLANKS;
5783
5784 /*
5785 * We may have the VersionInfo here.
5786 */
5787 version = xmlParseVersionInfo(ctxt);
5788 if (version == NULL)
5789 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005790 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005791 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005792 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5793 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005794 }
5795 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005796 ctxt->input->version = version;
5797
5798 /*
5799 * We must have the encoding declaration
5800 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005801 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005802 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5803 /*
5804 * The XML REC instructs us to stop parsing right here
5805 */
5806 return;
5807 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005808 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5809 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5810 "Missing encoding in text declaration\n");
5811 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005812
5813 SKIP_BLANKS;
5814 if ((RAW == '?') && (NXT(1) == '>')) {
5815 SKIP(2);
5816 } else if (RAW == '>') {
5817 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005818 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005819 NEXT;
5820 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005821 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005822 MOVETO_ENDTAG(CUR_PTR);
5823 NEXT;
5824 }
5825}
5826
5827/**
Owen Taylor3473f882001-02-23 17:55:21 +00005828 * xmlParseExternalSubset:
5829 * @ctxt: an XML parser context
5830 * @ExternalID: the external identifier
5831 * @SystemID: the system identifier (or URL)
5832 *
5833 * parse Markup declarations from an external subset
5834 *
5835 * [30] extSubset ::= textDecl? extSubsetDecl
5836 *
5837 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5838 */
5839void
5840xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5841 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005842 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005844 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005845 xmlParseTextDecl(ctxt);
5846 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5847 /*
5848 * The XML REC instructs us to stop parsing right here
5849 */
5850 ctxt->instate = XML_PARSER_EOF;
5851 return;
5852 }
5853 }
5854 if (ctxt->myDoc == NULL) {
5855 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5856 }
5857 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5858 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5859
5860 ctxt->instate = XML_PARSER_DTD;
5861 ctxt->external = 1;
5862 while (((RAW == '<') && (NXT(1) == '?')) ||
5863 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005864 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005865 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005866 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005867
5868 GROW;
5869 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5870 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005871 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005872 NEXT;
5873 } else if (RAW == '%') {
5874 xmlParsePEReference(ctxt);
5875 } else
5876 xmlParseMarkupDecl(ctxt);
5877
5878 /*
5879 * Pop-up of finished entities.
5880 */
5881 while ((RAW == 0) && (ctxt->inputNr > 1))
5882 xmlPopInput(ctxt);
5883
Daniel Veillardfdc91562002-07-01 21:52:03 +00005884 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005885 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005886 break;
5887 }
5888 }
5889
5890 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005891 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 }
5893
5894}
5895
5896/**
5897 * xmlParseReference:
5898 * @ctxt: an XML parser context
5899 *
5900 * parse and handle entity references in content, depending on the SAX
5901 * interface, this may end-up in a call to character() if this is a
5902 * CharRef, a predefined entity, if there is no reference() callback.
5903 * or if the parser was asked to switch to that mode.
5904 *
5905 * [67] Reference ::= EntityRef | CharRef
5906 */
5907void
5908xmlParseReference(xmlParserCtxtPtr ctxt) {
5909 xmlEntityPtr ent;
5910 xmlChar *val;
5911 if (RAW != '&') return;
5912
5913 if (NXT(1) == '#') {
5914 int i = 0;
5915 xmlChar out[10];
5916 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005917 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005918
5919 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5920 /*
5921 * So we are using non-UTF-8 buffers
5922 * Check that the char fit on 8bits, if not
5923 * generate a CharRef.
5924 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005925 if (value <= 0xFF) {
5926 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005927 out[1] = 0;
5928 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5929 (!ctxt->disableSAX))
5930 ctxt->sax->characters(ctxt->userData, out, 1);
5931 } else {
5932 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005933 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005934 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005935 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005936 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5937 (!ctxt->disableSAX))
5938 ctxt->sax->reference(ctxt->userData, out);
5939 }
5940 } else {
5941 /*
5942 * Just encode the value in UTF-8
5943 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005944 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005945 out[i] = 0;
5946 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5947 (!ctxt->disableSAX))
5948 ctxt->sax->characters(ctxt->userData, out, i);
5949 }
5950 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005951 int was_checked;
5952
Owen Taylor3473f882001-02-23 17:55:21 +00005953 ent = xmlParseEntityRef(ctxt);
5954 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005955 if (!ctxt->wellFormed)
5956 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005957 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if ((ent->name != NULL) &&
5959 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5960 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005961 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005962
5963
5964 /*
5965 * The first reference to the entity trigger a parsing phase
5966 * where the ent->children is filled with the result from
5967 * the parsing.
5968 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005969 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005970 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00005971
Owen Taylor3473f882001-02-23 17:55:21 +00005972 value = ent->content;
5973
5974 /*
5975 * Check that this entity is well formed
5976 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005977 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005978 (value[1] == 0) && (value[0] == '<') &&
5979 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5980 /*
5981 * DONE: get definite answer on this !!!
5982 * Lots of entity decls are used to declare a single
5983 * char
5984 * <!ENTITY lt "<">
5985 * Which seems to be valid since
5986 * 2.4: The ampersand character (&) and the left angle
5987 * bracket (<) may appear in their literal form only
5988 * when used ... They are also legal within the literal
5989 * entity value of an internal entity declaration;i
5990 * see "4.3.2 Well-Formed Parsed Entities".
5991 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5992 * Looking at the OASIS test suite and James Clark
5993 * tests, this is broken. However the XML REC uses
5994 * it. Is the XML REC not well-formed ????
5995 * This is a hack to avoid this problem
5996 *
5997 * ANSWER: since lt gt amp .. are already defined,
5998 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005999 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006000 * is lousy but acceptable.
6001 */
6002 list = xmlNewDocText(ctxt->myDoc, value);
6003 if (list != NULL) {
6004 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6005 (ent->children == NULL)) {
6006 ent->children = list;
6007 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006008 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006009 list->parent = (xmlNodePtr) ent;
6010 } else {
6011 xmlFreeNodeList(list);
6012 }
6013 } else if (list != NULL) {
6014 xmlFreeNodeList(list);
6015 }
6016 } else {
6017 /*
6018 * 4.3.2: An internal general parsed entity is well-formed
6019 * if its replacement text matches the production labeled
6020 * content.
6021 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006022
6023 void *user_data;
6024 /*
6025 * This is a bit hackish but this seems the best
6026 * way to make sure both SAX and DOM entity support
6027 * behaves okay.
6028 */
6029 if (ctxt->userData == ctxt)
6030 user_data = NULL;
6031 else
6032 user_data = ctxt->userData;
6033
Owen Taylor3473f882001-02-23 17:55:21 +00006034 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6035 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006036 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6037 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006038 ctxt->depth--;
6039 } else if (ent->etype ==
6040 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6041 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006042 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006043 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006044 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006045 ctxt->depth--;
6046 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006047 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006048 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6049 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006050 }
6051 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006052 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006053 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006054 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006055 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6056 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006057 (ent->children == NULL)) {
6058 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006059 if (ctxt->replaceEntities) {
6060 /*
6061 * Prune it directly in the generated document
6062 * except for single text nodes.
6063 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006064 if (((list->type == XML_TEXT_NODE) &&
6065 (list->next == NULL)) ||
6066 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006067 list->parent = (xmlNodePtr) ent;
6068 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006069 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006070 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006071 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006072 while (list != NULL) {
6073 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006074 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006075 if (list->next == NULL)
6076 ent->last = list;
6077 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006078 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006079 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006080#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006081 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6082 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006083#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006084 }
6085 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006086 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006087 while (list != NULL) {
6088 list->parent = (xmlNodePtr) ent;
6089 if (list->next == NULL)
6090 ent->last = list;
6091 list = list->next;
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
6094 } else {
6095 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006096 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006098 } else if ((ret != XML_ERR_OK) &&
6099 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006100 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6101 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006102 } else if (list != NULL) {
6103 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006104 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006105 }
6106 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006107 ent->checked = 1;
6108 }
6109
6110 if (ent->children == NULL) {
6111 /*
6112 * Probably running in SAX mode and the callbacks don't
6113 * build the entity content. So unless we already went
6114 * though parsing for first checking go though the entity
6115 * content to generate callbacks associated to the entity
6116 */
6117 if (was_checked == 1) {
6118 void *user_data;
6119 /*
6120 * This is a bit hackish but this seems the best
6121 * way to make sure both SAX and DOM entity support
6122 * behaves okay.
6123 */
6124 if (ctxt->userData == ctxt)
6125 user_data = NULL;
6126 else
6127 user_data = ctxt->userData;
6128
6129 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6130 ctxt->depth++;
6131 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6132 ent->content, user_data, NULL);
6133 ctxt->depth--;
6134 } else if (ent->etype ==
6135 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6136 ctxt->depth++;
6137 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6138 ctxt->sax, user_data, ctxt->depth,
6139 ent->URI, ent->ExternalID, NULL);
6140 ctxt->depth--;
6141 } else {
6142 ret = XML_ERR_ENTITY_PE_INTERNAL;
6143 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6144 "invalid entity type found\n", NULL);
6145 }
6146 if (ret == XML_ERR_ENTITY_LOOP) {
6147 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6148 return;
6149 }
6150 }
6151 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6152 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6153 /*
6154 * Entity reference callback comes second, it's somewhat
6155 * superfluous but a compatibility to historical behaviour
6156 */
6157 ctxt->sax->reference(ctxt->userData, ent->name);
6158 }
6159 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006160 }
6161 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006162 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006163 /*
6164 * Create a node.
6165 */
6166 ctxt->sax->reference(ctxt->userData, ent->name);
6167 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006168 }
6169 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006170 /*
6171 * There is a problem on the handling of _private for entities
6172 * (bug 155816): Should we copy the content of the field from
6173 * the entity (possibly overwriting some value set by the user
6174 * when a copy is created), should we leave it alone, or should
6175 * we try to take care of different situations? The problem
6176 * is exacerbated by the usage of this field by the xmlReader.
6177 * To fix this bug, we look at _private on the created node
6178 * and, if it's NULL, we copy in whatever was in the entity.
6179 * If it's not NULL we leave it alone. This is somewhat of a
6180 * hack - maybe we should have further tests to determine
6181 * what to do.
6182 */
Owen Taylor3473f882001-02-23 17:55:21 +00006183 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6184 /*
6185 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006186 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006187 * In the first occurrence list contains the replacement.
6188 * progressive == 2 means we are operating on the Reader
6189 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006190 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006191 if (((list == NULL) && (ent->owner == 0)) ||
6192 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006193 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006194
6195 /*
6196 * when operating on a reader, the entities definitions
6197 * are always owning the entities subtree.
6198 if (ctxt->parseMode == XML_PARSE_READER)
6199 ent->owner = 1;
6200 */
6201
Daniel Veillard62f313b2001-07-04 19:49:14 +00006202 cur = ent->children;
6203 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006204 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006205 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006206 if (nw->_private == NULL)
6207 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006208 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006209 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006210 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006211 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006212 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006213 if (cur == ent->last) {
6214 /*
6215 * needed to detect some strange empty
6216 * node cases in the reader tests
6217 */
6218 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006219 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006220 (nw->type == XML_ELEMENT_NODE) &&
6221 (nw->children == NULL))
6222 nw->extra = 1;
6223
Daniel Veillard62f313b2001-07-04 19:49:14 +00006224 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006225 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006226 cur = cur->next;
6227 }
Daniel Veillard81273902003-09-30 00:43:48 +00006228#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006229 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006230 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006231#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006232 } else if (list == NULL) {
6233 xmlNodePtr nw = NULL, cur, next, last,
6234 firstChild = NULL;
6235 /*
6236 * Copy the entity child list and make it the new
6237 * entity child list. The goal is to make sure any
6238 * ID or REF referenced will be the one from the
6239 * document content and not the entity copy.
6240 */
6241 cur = ent->children;
6242 ent->children = NULL;
6243 last = ent->last;
6244 ent->last = NULL;
6245 while (cur != NULL) {
6246 next = cur->next;
6247 cur->next = NULL;
6248 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006249 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006250 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006251 if (nw->_private == NULL)
6252 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006253 if (firstChild == NULL){
6254 firstChild = cur;
6255 }
6256 xmlAddChild((xmlNodePtr) ent, nw);
6257 xmlAddChild(ctxt->node, cur);
6258 }
6259 if (cur == last)
6260 break;
6261 cur = next;
6262 }
6263 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006264#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006265 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6266 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006267#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006268 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006269 const xmlChar *nbktext;
6270
Daniel Veillard62f313b2001-07-04 19:49:14 +00006271 /*
6272 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006273 * node with a possible previous text one which
6274 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006275 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006276 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6277 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006278 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006279 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006280 if ((ent->last != ent->children) &&
6281 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006282 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006283 xmlAddChildList(ctxt->node, ent->children);
6284 }
6285
Owen Taylor3473f882001-02-23 17:55:21 +00006286 /*
6287 * This is to avoid a nasty side effect, see
6288 * characters() in SAX.c
6289 */
6290 ctxt->nodemem = 0;
6291 ctxt->nodelen = 0;
6292 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006293 }
6294 }
6295 } else {
6296 val = ent->content;
6297 if (val == NULL) return;
6298 /*
6299 * inline the entity.
6300 */
6301 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6302 (!ctxt->disableSAX))
6303 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6304 }
6305 }
6306}
6307
6308/**
6309 * xmlParseEntityRef:
6310 * @ctxt: an XML parser context
6311 *
6312 * parse ENTITY references declarations
6313 *
6314 * [68] EntityRef ::= '&' Name ';'
6315 *
6316 * [ WFC: Entity Declared ]
6317 * In a document without any DTD, a document with only an internal DTD
6318 * subset which contains no parameter entity references, or a document
6319 * with "standalone='yes'", the Name given in the entity reference
6320 * must match that in an entity declaration, except that well-formed
6321 * documents need not declare any of the following entities: amp, lt,
6322 * gt, apos, quot. The declaration of a parameter entity must precede
6323 * any reference to it. Similarly, the declaration of a general entity
6324 * must precede any reference to it which appears in a default value in an
6325 * attribute-list declaration. Note that if entities are declared in the
6326 * external subset or in external parameter entities, a non-validating
6327 * processor is not obligated to read and process their declarations;
6328 * for such documents, the rule that an entity must be declared is a
6329 * well-formedness constraint only if standalone='yes'.
6330 *
6331 * [ WFC: Parsed Entity ]
6332 * An entity reference must not contain the name of an unparsed entity
6333 *
6334 * Returns the xmlEntityPtr if found, or NULL otherwise.
6335 */
6336xmlEntityPtr
6337xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006338 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006339 xmlEntityPtr ent = NULL;
6340
6341 GROW;
6342
6343 if (RAW == '&') {
6344 NEXT;
6345 name = xmlParseName(ctxt);
6346 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006347 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6348 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006349 } else {
6350 if (RAW == ';') {
6351 NEXT;
6352 /*
6353 * Ask first SAX for entity resolution, otherwise try the
6354 * predefined set.
6355 */
6356 if (ctxt->sax != NULL) {
6357 if (ctxt->sax->getEntity != NULL)
6358 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006359 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006360 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006361 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6362 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006363 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006364 }
Owen Taylor3473f882001-02-23 17:55:21 +00006365 }
6366 /*
6367 * [ WFC: Entity Declared ]
6368 * In a document without any DTD, a document with only an
6369 * internal DTD subset which contains no parameter entity
6370 * references, or a document with "standalone='yes'", the
6371 * Name given in the entity reference must match that in an
6372 * entity declaration, except that well-formed documents
6373 * need not declare any of the following entities: amp, lt,
6374 * gt, apos, quot.
6375 * The declaration of a parameter entity must precede any
6376 * reference to it.
6377 * Similarly, the declaration of a general entity must
6378 * precede any reference to it which appears in a default
6379 * value in an attribute-list declaration. Note that if
6380 * entities are declared in the external subset or in
6381 * external parameter entities, a non-validating processor
6382 * is not obligated to read and process their declarations;
6383 * for such documents, the rule that an entity must be
6384 * declared is a well-formedness constraint only if
6385 * standalone='yes'.
6386 */
6387 if (ent == NULL) {
6388 if ((ctxt->standalone == 1) ||
6389 ((ctxt->hasExternalSubset == 0) &&
6390 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006391 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006392 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006393 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006394 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006395 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006396 if ((ctxt->inSubset == 0) &&
6397 (ctxt->sax != NULL) &&
6398 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00006399 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006400 }
Owen Taylor3473f882001-02-23 17:55:21 +00006401 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006402 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006403 }
6404
6405 /*
6406 * [ WFC: Parsed Entity ]
6407 * An entity reference must not contain the name of an
6408 * unparsed entity
6409 */
6410 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006411 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006412 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006413 }
6414
6415 /*
6416 * [ WFC: No External Entity References ]
6417 * Attribute values cannot contain direct or indirect
6418 * entity references to external entities.
6419 */
6420 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6421 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006422 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6423 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 }
6425 /*
6426 * [ WFC: No < in Attribute Values ]
6427 * The replacement text of any entity referred to directly or
6428 * indirectly in an attribute value (other than "&lt;") must
6429 * not contain a <.
6430 */
6431 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6432 (ent != NULL) &&
6433 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6434 (ent->content != NULL) &&
6435 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006436 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006437 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006438 }
6439
6440 /*
6441 * Internal check, no parameter entities here ...
6442 */
6443 else {
6444 switch (ent->etype) {
6445 case XML_INTERNAL_PARAMETER_ENTITY:
6446 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006447 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6448 "Attempt to reference the parameter entity '%s'\n",
6449 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 break;
6451 default:
6452 break;
6453 }
6454 }
6455
6456 /*
6457 * [ WFC: No Recursion ]
6458 * A parsed entity must not contain a recursive reference
6459 * to itself, either directly or indirectly.
6460 * Done somewhere else
6461 */
6462
6463 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006464 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006465 }
Owen Taylor3473f882001-02-23 17:55:21 +00006466 }
6467 }
6468 return(ent);
6469}
6470
6471/**
6472 * xmlParseStringEntityRef:
6473 * @ctxt: an XML parser context
6474 * @str: a pointer to an index in the string
6475 *
6476 * parse ENTITY references declarations, but this version parses it from
6477 * a string value.
6478 *
6479 * [68] EntityRef ::= '&' Name ';'
6480 *
6481 * [ WFC: Entity Declared ]
6482 * In a document without any DTD, a document with only an internal DTD
6483 * subset which contains no parameter entity references, or a document
6484 * with "standalone='yes'", the Name given in the entity reference
6485 * must match that in an entity declaration, except that well-formed
6486 * documents need not declare any of the following entities: amp, lt,
6487 * gt, apos, quot. The declaration of a parameter entity must precede
6488 * any reference to it. Similarly, the declaration of a general entity
6489 * must precede any reference to it which appears in a default value in an
6490 * attribute-list declaration. Note that if entities are declared in the
6491 * external subset or in external parameter entities, a non-validating
6492 * processor is not obligated to read and process their declarations;
6493 * for such documents, the rule that an entity must be declared is a
6494 * well-formedness constraint only if standalone='yes'.
6495 *
6496 * [ WFC: Parsed Entity ]
6497 * An entity reference must not contain the name of an unparsed entity
6498 *
6499 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6500 * is updated to the current location in the string.
6501 */
6502xmlEntityPtr
6503xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6504 xmlChar *name;
6505 const xmlChar *ptr;
6506 xmlChar cur;
6507 xmlEntityPtr ent = NULL;
6508
6509 if ((str == NULL) || (*str == NULL))
6510 return(NULL);
6511 ptr = *str;
6512 cur = *ptr;
6513 if (cur == '&') {
6514 ptr++;
6515 cur = *ptr;
6516 name = xmlParseStringName(ctxt, &ptr);
6517 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006518 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6519 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006520 } else {
6521 if (*ptr == ';') {
6522 ptr++;
6523 /*
6524 * Ask first SAX for entity resolution, otherwise try the
6525 * predefined set.
6526 */
6527 if (ctxt->sax != NULL) {
6528 if (ctxt->sax->getEntity != NULL)
6529 ent = ctxt->sax->getEntity(ctxt->userData, name);
6530 if (ent == NULL)
6531 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006532 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006533 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006534 }
Owen Taylor3473f882001-02-23 17:55:21 +00006535 }
6536 /*
6537 * [ WFC: Entity Declared ]
6538 * In a document without any DTD, a document with only an
6539 * internal DTD subset which contains no parameter entity
6540 * references, or a document with "standalone='yes'", the
6541 * Name given in the entity reference must match that in an
6542 * entity declaration, except that well-formed documents
6543 * need not declare any of the following entities: amp, lt,
6544 * gt, apos, quot.
6545 * The declaration of a parameter entity must precede any
6546 * reference to it.
6547 * Similarly, the declaration of a general entity must
6548 * precede any reference to it which appears in a default
6549 * value in an attribute-list declaration. Note that if
6550 * entities are declared in the external subset or in
6551 * external parameter entities, a non-validating processor
6552 * is not obligated to read and process their declarations;
6553 * for such documents, the rule that an entity must be
6554 * declared is a well-formedness constraint only if
6555 * standalone='yes'.
6556 */
6557 if (ent == NULL) {
6558 if ((ctxt->standalone == 1) ||
6559 ((ctxt->hasExternalSubset == 0) &&
6560 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006561 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006562 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006563 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006564 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006565 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006566 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006567 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006568 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006569 }
6570
6571 /*
6572 * [ WFC: Parsed Entity ]
6573 * An entity reference must not contain the name of an
6574 * unparsed entity
6575 */
6576 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006578 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580
6581 /*
6582 * [ WFC: No External Entity References ]
6583 * Attribute values cannot contain direct or indirect
6584 * entity references to external entities.
6585 */
6586 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6587 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006589 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006590 }
6591 /*
6592 * [ WFC: No < in Attribute Values ]
6593 * The replacement text of any entity referred to directly or
6594 * indirectly in an attribute value (other than "&lt;") must
6595 * not contain a <.
6596 */
6597 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6598 (ent != NULL) &&
6599 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6600 (ent->content != NULL) &&
6601 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006602 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6603 "'<' in entity '%s' is not allowed in attributes values\n",
6604 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006605 }
6606
6607 /*
6608 * Internal check, no parameter entities here ...
6609 */
6610 else {
6611 switch (ent->etype) {
6612 case XML_INTERNAL_PARAMETER_ENTITY:
6613 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006614 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6615 "Attempt to reference the parameter entity '%s'\n",
6616 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006617 break;
6618 default:
6619 break;
6620 }
6621 }
6622
6623 /*
6624 * [ WFC: No Recursion ]
6625 * A parsed entity must not contain a recursive reference
6626 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006627 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006628 */
6629
6630 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006631 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006632 }
6633 xmlFree(name);
6634 }
6635 }
6636 *str = ptr;
6637 return(ent);
6638}
6639
6640/**
6641 * xmlParsePEReference:
6642 * @ctxt: an XML parser context
6643 *
6644 * parse PEReference declarations
6645 * The entity content is handled directly by pushing it's content as
6646 * a new input stream.
6647 *
6648 * [69] PEReference ::= '%' Name ';'
6649 *
6650 * [ WFC: No Recursion ]
6651 * A parsed entity must not contain a recursive
6652 * reference to itself, either directly or indirectly.
6653 *
6654 * [ WFC: Entity Declared ]
6655 * In a document without any DTD, a document with only an internal DTD
6656 * subset which contains no parameter entity references, or a document
6657 * with "standalone='yes'", ... ... The declaration of a parameter
6658 * entity must precede any reference to it...
6659 *
6660 * [ VC: Entity Declared ]
6661 * In a document with an external subset or external parameter entities
6662 * with "standalone='no'", ... ... The declaration of a parameter entity
6663 * must precede any reference to it...
6664 *
6665 * [ WFC: In DTD ]
6666 * Parameter-entity references may only appear in the DTD.
6667 * NOTE: misleading but this is handled.
6668 */
6669void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006670xmlParsePEReference(xmlParserCtxtPtr ctxt)
6671{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006672 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006673 xmlEntityPtr entity = NULL;
6674 xmlParserInputPtr input;
6675
6676 if (RAW == '%') {
6677 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006678 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006679 if (name == NULL) {
6680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6681 "xmlParsePEReference: no name\n");
6682 } else {
6683 if (RAW == ';') {
6684 NEXT;
6685 if ((ctxt->sax != NULL) &&
6686 (ctxt->sax->getParameterEntity != NULL))
6687 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6688 name);
6689 if (entity == NULL) {
6690 /*
6691 * [ WFC: Entity Declared ]
6692 * In a document without any DTD, a document with only an
6693 * internal DTD subset which contains no parameter entity
6694 * references, or a document with "standalone='yes'", ...
6695 * ... The declaration of a parameter entity must precede
6696 * any reference to it...
6697 */
6698 if ((ctxt->standalone == 1) ||
6699 ((ctxt->hasExternalSubset == 0) &&
6700 (ctxt->hasPErefs == 0))) {
6701 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6702 "PEReference: %%%s; not found\n",
6703 name);
6704 } else {
6705 /*
6706 * [ VC: Entity Declared ]
6707 * In a document with an external subset or external
6708 * parameter entities with "standalone='no'", ...
6709 * ... The declaration of a parameter entity must
6710 * precede any reference to it...
6711 */
6712 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6713 "PEReference: %%%s; not found\n",
6714 name, NULL);
6715 ctxt->valid = 0;
6716 }
6717 } else {
6718 /*
6719 * Internal checking in case the entity quest barfed
6720 */
6721 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6722 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6723 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6724 "Internal: %%%s; is not a parameter entity\n",
6725 name, NULL);
6726 } else if (ctxt->input->free != deallocblankswrapper) {
6727 input =
6728 xmlNewBlanksWrapperInputStream(ctxt, entity);
6729 xmlPushInput(ctxt, input);
6730 } else {
6731 /*
6732 * TODO !!!
6733 * handle the extra spaces added before and after
6734 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6735 */
6736 input = xmlNewEntityInputStream(ctxt, entity);
6737 xmlPushInput(ctxt, input);
6738 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006739 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006740 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006741 xmlParseTextDecl(ctxt);
6742 if (ctxt->errNo ==
6743 XML_ERR_UNSUPPORTED_ENCODING) {
6744 /*
6745 * The XML REC instructs us to stop parsing
6746 * right here
6747 */
6748 ctxt->instate = XML_PARSER_EOF;
6749 return;
6750 }
6751 }
6752 }
6753 }
6754 ctxt->hasPErefs = 1;
6755 } else {
6756 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6757 }
6758 }
Owen Taylor3473f882001-02-23 17:55:21 +00006759 }
6760}
6761
6762/**
6763 * xmlParseStringPEReference:
6764 * @ctxt: an XML parser context
6765 * @str: a pointer to an index in the string
6766 *
6767 * parse PEReference declarations
6768 *
6769 * [69] PEReference ::= '%' Name ';'
6770 *
6771 * [ WFC: No Recursion ]
6772 * A parsed entity must not contain a recursive
6773 * reference to itself, either directly or indirectly.
6774 *
6775 * [ WFC: Entity Declared ]
6776 * In a document without any DTD, a document with only an internal DTD
6777 * subset which contains no parameter entity references, or a document
6778 * with "standalone='yes'", ... ... The declaration of a parameter
6779 * entity must precede any reference to it...
6780 *
6781 * [ VC: Entity Declared ]
6782 * In a document with an external subset or external parameter entities
6783 * with "standalone='no'", ... ... The declaration of a parameter entity
6784 * must precede any reference to it...
6785 *
6786 * [ WFC: In DTD ]
6787 * Parameter-entity references may only appear in the DTD.
6788 * NOTE: misleading but this is handled.
6789 *
6790 * Returns the string of the entity content.
6791 * str is updated to the current value of the index
6792 */
6793xmlEntityPtr
6794xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6795 const xmlChar *ptr;
6796 xmlChar cur;
6797 xmlChar *name;
6798 xmlEntityPtr entity = NULL;
6799
6800 if ((str == NULL) || (*str == NULL)) return(NULL);
6801 ptr = *str;
6802 cur = *ptr;
6803 if (cur == '%') {
6804 ptr++;
6805 cur = *ptr;
6806 name = xmlParseStringName(ctxt, &ptr);
6807 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006808 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6809 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006810 } else {
6811 cur = *ptr;
6812 if (cur == ';') {
6813 ptr++;
6814 cur = *ptr;
6815 if ((ctxt->sax != NULL) &&
6816 (ctxt->sax->getParameterEntity != NULL))
6817 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6818 name);
6819 if (entity == NULL) {
6820 /*
6821 * [ WFC: Entity Declared ]
6822 * In a document without any DTD, a document with only an
6823 * internal DTD subset which contains no parameter entity
6824 * references, or a document with "standalone='yes'", ...
6825 * ... The declaration of a parameter entity must precede
6826 * any reference to it...
6827 */
6828 if ((ctxt->standalone == 1) ||
6829 ((ctxt->hasExternalSubset == 0) &&
6830 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006831 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006832 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006833 } else {
6834 /*
6835 * [ VC: Entity Declared ]
6836 * In a document with an external subset or external
6837 * parameter entities with "standalone='no'", ...
6838 * ... The declaration of a parameter entity must
6839 * precede any reference to it...
6840 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006841 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6842 "PEReference: %%%s; not found\n",
6843 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006844 ctxt->valid = 0;
6845 }
6846 } else {
6847 /*
6848 * Internal checking in case the entity quest barfed
6849 */
6850 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6851 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006852 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6853 "%%%s; is not a parameter entity\n",
6854 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006855 }
6856 }
6857 ctxt->hasPErefs = 1;
6858 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006859 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006860 }
6861 xmlFree(name);
6862 }
6863 }
6864 *str = ptr;
6865 return(entity);
6866}
6867
6868/**
6869 * xmlParseDocTypeDecl:
6870 * @ctxt: an XML parser context
6871 *
6872 * parse a DOCTYPE declaration
6873 *
6874 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6875 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6876 *
6877 * [ VC: Root Element Type ]
6878 * The Name in the document type declaration must match the element
6879 * type of the root element.
6880 */
6881
6882void
6883xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006884 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006885 xmlChar *ExternalID = NULL;
6886 xmlChar *URI = NULL;
6887
6888 /*
6889 * We know that '<!DOCTYPE' has been detected.
6890 */
6891 SKIP(9);
6892
6893 SKIP_BLANKS;
6894
6895 /*
6896 * Parse the DOCTYPE name.
6897 */
6898 name = xmlParseName(ctxt);
6899 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006900 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6901 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006902 }
6903 ctxt->intSubName = name;
6904
6905 SKIP_BLANKS;
6906
6907 /*
6908 * Check for SystemID and ExternalID
6909 */
6910 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6911
6912 if ((URI != NULL) || (ExternalID != NULL)) {
6913 ctxt->hasExternalSubset = 1;
6914 }
6915 ctxt->extSubURI = URI;
6916 ctxt->extSubSystem = ExternalID;
6917
6918 SKIP_BLANKS;
6919
6920 /*
6921 * Create and update the internal subset.
6922 */
6923 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6924 (!ctxt->disableSAX))
6925 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6926
6927 /*
6928 * Is there any internal subset declarations ?
6929 * they are handled separately in xmlParseInternalSubset()
6930 */
6931 if (RAW == '[')
6932 return;
6933
6934 /*
6935 * We should be at the end of the DOCTYPE declaration.
6936 */
6937 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006938 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006939 }
6940 NEXT;
6941}
6942
6943/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006944 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006945 * @ctxt: an XML parser context
6946 *
6947 * parse the internal subset declaration
6948 *
6949 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6950 */
6951
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006952static void
Owen Taylor3473f882001-02-23 17:55:21 +00006953xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6954 /*
6955 * Is there any DTD definition ?
6956 */
6957 if (RAW == '[') {
6958 ctxt->instate = XML_PARSER_DTD;
6959 NEXT;
6960 /*
6961 * Parse the succession of Markup declarations and
6962 * PEReferences.
6963 * Subsequence (markupdecl | PEReference | S)*
6964 */
6965 while (RAW != ']') {
6966 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006967 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006968
6969 SKIP_BLANKS;
6970 xmlParseMarkupDecl(ctxt);
6971 xmlParsePEReference(ctxt);
6972
6973 /*
6974 * Pop-up of finished entities.
6975 */
6976 while ((RAW == 0) && (ctxt->inputNr > 1))
6977 xmlPopInput(ctxt);
6978
6979 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006980 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006981 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006982 break;
6983 }
6984 }
6985 if (RAW == ']') {
6986 NEXT;
6987 SKIP_BLANKS;
6988 }
6989 }
6990
6991 /*
6992 * We should be at the end of the DOCTYPE declaration.
6993 */
6994 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006995 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006996 }
6997 NEXT;
6998}
6999
Daniel Veillard81273902003-09-30 00:43:48 +00007000#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007001/**
7002 * xmlParseAttribute:
7003 * @ctxt: an XML parser context
7004 * @value: a xmlChar ** used to store the value of the attribute
7005 *
7006 * parse an attribute
7007 *
7008 * [41] Attribute ::= Name Eq AttValue
7009 *
7010 * [ WFC: No External Entity References ]
7011 * Attribute values cannot contain direct or indirect entity references
7012 * to external entities.
7013 *
7014 * [ WFC: No < in Attribute Values ]
7015 * The replacement text of any entity referred to directly or indirectly in
7016 * an attribute value (other than "&lt;") must not contain a <.
7017 *
7018 * [ VC: Attribute Value Type ]
7019 * The attribute must have been declared; the value must be of the type
7020 * declared for it.
7021 *
7022 * [25] Eq ::= S? '=' S?
7023 *
7024 * With namespace:
7025 *
7026 * [NS 11] Attribute ::= QName Eq AttValue
7027 *
7028 * Also the case QName == xmlns:??? is handled independently as a namespace
7029 * definition.
7030 *
7031 * Returns the attribute name, and the value in *value.
7032 */
7033
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007034const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007035xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007036 const xmlChar *name;
7037 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007038
7039 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007040 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007041 name = xmlParseName(ctxt);
7042 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007043 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007044 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007045 return(NULL);
7046 }
7047
7048 /*
7049 * read the value
7050 */
7051 SKIP_BLANKS;
7052 if (RAW == '=') {
7053 NEXT;
7054 SKIP_BLANKS;
7055 val = xmlParseAttValue(ctxt);
7056 ctxt->instate = XML_PARSER_CONTENT;
7057 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007058 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007059 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007060 return(NULL);
7061 }
7062
7063 /*
7064 * Check that xml:lang conforms to the specification
7065 * No more registered as an error, just generate a warning now
7066 * since this was deprecated in XML second edition
7067 */
7068 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7069 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007070 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7071 "Malformed value for xml:lang : %s\n",
7072 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007073 }
7074 }
7075
7076 /*
7077 * Check that xml:space conforms to the specification
7078 */
7079 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7080 if (xmlStrEqual(val, BAD_CAST "default"))
7081 *(ctxt->space) = 0;
7082 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7083 *(ctxt->space) = 1;
7084 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007085 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007086"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007087 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007088 }
7089 }
7090
7091 *value = val;
7092 return(name);
7093}
7094
7095/**
7096 * xmlParseStartTag:
7097 * @ctxt: an XML parser context
7098 *
7099 * parse a start of tag either for rule element or
7100 * EmptyElement. In both case we don't parse the tag closing chars.
7101 *
7102 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7103 *
7104 * [ WFC: Unique Att Spec ]
7105 * No attribute name may appear more than once in the same start-tag or
7106 * empty-element tag.
7107 *
7108 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7109 *
7110 * [ WFC: Unique Att Spec ]
7111 * No attribute name may appear more than once in the same start-tag or
7112 * empty-element tag.
7113 *
7114 * With namespace:
7115 *
7116 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7117 *
7118 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7119 *
7120 * Returns the element name parsed
7121 */
7122
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007123const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007124xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007125 const xmlChar *name;
7126 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007127 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007128 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007129 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007130 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007131 int i;
7132
7133 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007134 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007135
7136 name = xmlParseName(ctxt);
7137 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007139 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007140 return(NULL);
7141 }
7142
7143 /*
7144 * Now parse the attributes, it ends up with the ending
7145 *
7146 * (S Attribute)* S?
7147 */
7148 SKIP_BLANKS;
7149 GROW;
7150
Daniel Veillard21a0f912001-02-25 19:54:14 +00007151 while ((RAW != '>') &&
7152 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007153 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007154 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007155 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007156
7157 attname = xmlParseAttribute(ctxt, &attvalue);
7158 if ((attname != NULL) && (attvalue != NULL)) {
7159 /*
7160 * [ WFC: Unique Att Spec ]
7161 * No attribute name may appear more than once in the same
7162 * start-tag or empty-element tag.
7163 */
7164 for (i = 0; i < nbatts;i += 2) {
7165 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007166 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007167 xmlFree(attvalue);
7168 goto failed;
7169 }
7170 }
Owen Taylor3473f882001-02-23 17:55:21 +00007171 /*
7172 * Add the pair to atts
7173 */
7174 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007175 maxatts = 22; /* allow for 10 attrs by default */
7176 atts = (const xmlChar **)
7177 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007178 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007179 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007180 if (attvalue != NULL)
7181 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007182 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007183 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007184 ctxt->atts = atts;
7185 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007186 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007187 const xmlChar **n;
7188
Owen Taylor3473f882001-02-23 17:55:21 +00007189 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007190 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007191 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007192 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007193 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007194 if (attvalue != NULL)
7195 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007196 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007197 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007198 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007199 ctxt->atts = atts;
7200 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007201 }
7202 atts[nbatts++] = attname;
7203 atts[nbatts++] = attvalue;
7204 atts[nbatts] = NULL;
7205 atts[nbatts + 1] = NULL;
7206 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007207 if (attvalue != NULL)
7208 xmlFree(attvalue);
7209 }
7210
7211failed:
7212
Daniel Veillard3772de32002-12-17 10:31:45 +00007213 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007214 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7215 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007216 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007217 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7218 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007219 }
7220 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007221 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7222 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007223 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7224 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007225 break;
7226 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007227 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007228 GROW;
7229 }
7230
7231 /*
7232 * SAX: Start of Element !
7233 */
7234 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007235 (!ctxt->disableSAX)) {
7236 if (nbatts > 0)
7237 ctxt->sax->startElement(ctxt->userData, name, atts);
7238 else
7239 ctxt->sax->startElement(ctxt->userData, name, NULL);
7240 }
Owen Taylor3473f882001-02-23 17:55:21 +00007241
7242 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007243 /* Free only the content strings */
7244 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007245 if (atts[i] != NULL)
7246 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007247 }
7248 return(name);
7249}
7250
7251/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007252 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007253 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007254 * @line: line of the start tag
7255 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007256 *
7257 * parse an end of tag
7258 *
7259 * [42] ETag ::= '</' Name S? '>'
7260 *
7261 * With namespace
7262 *
7263 * [NS 9] ETag ::= '</' QName S? '>'
7264 */
7265
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007266static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007267xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007268 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007269
7270 GROW;
7271 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007272 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007273 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007274 return;
7275 }
7276 SKIP(2);
7277
Daniel Veillard46de64e2002-05-29 08:21:33 +00007278 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007279
7280 /*
7281 * We should definitely be at the ending "S? '>'" part
7282 */
7283 GROW;
7284 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007285 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007286 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007287 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007288 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007289
7290 /*
7291 * [ WFC: Element Type Match ]
7292 * The Name in an element's end-tag must match the element type in the
7293 * start-tag.
7294 *
7295 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007296 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007297 if (name == NULL) name = BAD_CAST "unparseable";
7298 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007299 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007300 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007301 }
7302
7303 /*
7304 * SAX: End of Tag
7305 */
7306 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7307 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007308 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007309
Daniel Veillarde57ec792003-09-10 10:50:59 +00007310 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007311 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007312 return;
7313}
7314
7315/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007316 * xmlParseEndTag:
7317 * @ctxt: an XML parser context
7318 *
7319 * parse an end of tag
7320 *
7321 * [42] ETag ::= '</' Name S? '>'
7322 *
7323 * With namespace
7324 *
7325 * [NS 9] ETag ::= '</' QName S? '>'
7326 */
7327
7328void
7329xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007330 xmlParseEndTag1(ctxt, 0);
7331}
Daniel Veillard81273902003-09-30 00:43:48 +00007332#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007333
7334/************************************************************************
7335 * *
7336 * SAX 2 specific operations *
7337 * *
7338 ************************************************************************/
7339
7340static const xmlChar *
7341xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7342 int len = 0, l;
7343 int c;
7344 int count = 0;
7345
7346 /*
7347 * Handler for more complex cases
7348 */
7349 GROW;
7350 c = CUR_CHAR(l);
7351 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007352 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007353 return(NULL);
7354 }
7355
7356 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007357 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007358 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007359 (IS_COMBINING(c)) ||
7360 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007361 if (count++ > 100) {
7362 count = 0;
7363 GROW;
7364 }
7365 len += l;
7366 NEXTL(l);
7367 c = CUR_CHAR(l);
7368 }
7369 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7370}
7371
7372/*
7373 * xmlGetNamespace:
7374 * @ctxt: an XML parser context
7375 * @prefix: the prefix to lookup
7376 *
7377 * Lookup the namespace name for the @prefix (which ca be NULL)
7378 * The prefix must come from the @ctxt->dict dictionnary
7379 *
7380 * Returns the namespace name or NULL if not bound
7381 */
7382static const xmlChar *
7383xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7384 int i;
7385
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007388 if (ctxt->nsTab[i] == prefix) {
7389 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7390 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007392 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007393 return(NULL);
7394}
7395
7396/**
7397 * xmlParseNCName:
7398 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007399 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 *
7401 * parse an XML name.
7402 *
7403 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7404 * CombiningChar | Extender
7405 *
7406 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7407 *
7408 * Returns the Name parsed or NULL
7409 */
7410
7411static const xmlChar *
7412xmlParseNCName(xmlParserCtxtPtr ctxt) {
7413 const xmlChar *in;
7414 const xmlChar *ret;
7415 int count = 0;
7416
7417 /*
7418 * Accelerator for simple ASCII names
7419 */
7420 in = ctxt->input->cur;
7421 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7422 ((*in >= 0x41) && (*in <= 0x5A)) ||
7423 (*in == '_')) {
7424 in++;
7425 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7426 ((*in >= 0x41) && (*in <= 0x5A)) ||
7427 ((*in >= 0x30) && (*in <= 0x39)) ||
7428 (*in == '_') || (*in == '-') ||
7429 (*in == '.'))
7430 in++;
7431 if ((*in > 0) && (*in < 0x80)) {
7432 count = in - ctxt->input->cur;
7433 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7434 ctxt->input->cur = in;
7435 ctxt->nbChars += count;
7436 ctxt->input->col += count;
7437 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007438 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 }
7440 return(ret);
7441 }
7442 }
7443 return(xmlParseNCNameComplex(ctxt));
7444}
7445
7446/**
7447 * xmlParseQName:
7448 * @ctxt: an XML parser context
7449 * @prefix: pointer to store the prefix part
7450 *
7451 * parse an XML Namespace QName
7452 *
7453 * [6] QName ::= (Prefix ':')? LocalPart
7454 * [7] Prefix ::= NCName
7455 * [8] LocalPart ::= NCName
7456 *
7457 * Returns the Name parsed or NULL
7458 */
7459
7460static const xmlChar *
7461xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7462 const xmlChar *l, *p;
7463
7464 GROW;
7465
7466 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007467 if (l == NULL) {
7468 if (CUR == ':') {
7469 l = xmlParseName(ctxt);
7470 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007471 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7472 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007473 *prefix = NULL;
7474 return(l);
7475 }
7476 }
7477 return(NULL);
7478 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007479 if (CUR == ':') {
7480 NEXT;
7481 p = l;
7482 l = xmlParseNCName(ctxt);
7483 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007484 xmlChar *tmp;
7485
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007486 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7487 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007488 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7489 p = xmlDictLookup(ctxt->dict, tmp, -1);
7490 if (tmp != NULL) xmlFree(tmp);
7491 *prefix = NULL;
7492 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007493 }
7494 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007495 xmlChar *tmp;
7496
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007497 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7498 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007499 NEXT;
7500 tmp = (xmlChar *) xmlParseName(ctxt);
7501 if (tmp != NULL) {
7502 tmp = xmlBuildQName(tmp, l, NULL, 0);
7503 l = xmlDictLookup(ctxt->dict, tmp, -1);
7504 if (tmp != NULL) xmlFree(tmp);
7505 *prefix = p;
7506 return(l);
7507 }
7508 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7509 l = xmlDictLookup(ctxt->dict, tmp, -1);
7510 if (tmp != NULL) xmlFree(tmp);
7511 *prefix = p;
7512 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007513 }
7514 *prefix = p;
7515 } else
7516 *prefix = NULL;
7517 return(l);
7518}
7519
7520/**
7521 * xmlParseQNameAndCompare:
7522 * @ctxt: an XML parser context
7523 * @name: the localname
7524 * @prefix: the prefix, if any.
7525 *
7526 * parse an XML name and compares for match
7527 * (specialized for endtag parsing)
7528 *
7529 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7530 * and the name for mismatch
7531 */
7532
7533static const xmlChar *
7534xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7535 xmlChar const *prefix) {
7536 const xmlChar *cmp = name;
7537 const xmlChar *in;
7538 const xmlChar *ret;
7539 const xmlChar *prefix2;
7540
7541 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7542
7543 GROW;
7544 in = ctxt->input->cur;
7545
7546 cmp = prefix;
7547 while (*in != 0 && *in == *cmp) {
7548 ++in;
7549 ++cmp;
7550 }
7551 if ((*cmp == 0) && (*in == ':')) {
7552 in++;
7553 cmp = name;
7554 while (*in != 0 && *in == *cmp) {
7555 ++in;
7556 ++cmp;
7557 }
William M. Brack76e95df2003-10-18 16:20:14 +00007558 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007559 /* success */
7560 ctxt->input->cur = in;
7561 return((const xmlChar*) 1);
7562 }
7563 }
7564 /*
7565 * all strings coms from the dictionary, equality can be done directly
7566 */
7567 ret = xmlParseQName (ctxt, &prefix2);
7568 if ((ret == name) && (prefix == prefix2))
7569 return((const xmlChar*) 1);
7570 return ret;
7571}
7572
7573/**
7574 * xmlParseAttValueInternal:
7575 * @ctxt: an XML parser context
7576 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007577 * @alloc: whether the attribute was reallocated as a new string
7578 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007579 *
7580 * parse a value for an attribute.
7581 * NOTE: if no normalization is needed, the routine will return pointers
7582 * directly from the data buffer.
7583 *
7584 * 3.3.3 Attribute-Value Normalization:
7585 * Before the value of an attribute is passed to the application or
7586 * checked for validity, the XML processor must normalize it as follows:
7587 * - a character reference is processed by appending the referenced
7588 * character to the attribute value
7589 * - an entity reference is processed by recursively processing the
7590 * replacement text of the entity
7591 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7592 * appending #x20 to the normalized value, except that only a single
7593 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7594 * parsed entity or the literal entity value of an internal parsed entity
7595 * - other characters are processed by appending them to the normalized value
7596 * If the declared value is not CDATA, then the XML processor must further
7597 * process the normalized attribute value by discarding any leading and
7598 * trailing space (#x20) characters, and by replacing sequences of space
7599 * (#x20) characters by a single space (#x20) character.
7600 * All attributes for which no declaration has been read should be treated
7601 * by a non-validating parser as if declared CDATA.
7602 *
7603 * Returns the AttValue parsed or NULL. The value has to be freed by the
7604 * caller if it was copied, this can be detected by val[*len] == 0.
7605 */
7606
7607static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007608xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7609 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007610{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007611 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007612 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007613 xmlChar *ret = NULL;
7614
7615 GROW;
7616 in = (xmlChar *) CUR_PTR;
7617 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007618 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007619 return (NULL);
7620 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007621 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007622
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007623 /*
7624 * try to handle in this routine the most common case where no
7625 * allocation of a new string is required and where content is
7626 * pure ASCII.
7627 */
7628 limit = *in++;
7629 end = ctxt->input->end;
7630 start = in;
7631 if (in >= end) {
7632 const xmlChar *oldbase = ctxt->input->base;
7633 GROW;
7634 if (oldbase != ctxt->input->base) {
7635 long delta = ctxt->input->base - oldbase;
7636 start = start + delta;
7637 in = in + delta;
7638 }
7639 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007640 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007641 if (normalize) {
7642 /*
7643 * Skip any leading spaces
7644 */
7645 while ((in < end) && (*in != limit) &&
7646 ((*in == 0x20) || (*in == 0x9) ||
7647 (*in == 0xA) || (*in == 0xD))) {
7648 in++;
7649 start = in;
7650 if (in >= end) {
7651 const xmlChar *oldbase = ctxt->input->base;
7652 GROW;
7653 if (oldbase != ctxt->input->base) {
7654 long delta = ctxt->input->base - oldbase;
7655 start = start + delta;
7656 in = in + delta;
7657 }
7658 end = ctxt->input->end;
7659 }
7660 }
7661 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7662 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7663 if ((*in++ == 0x20) && (*in == 0x20)) break;
7664 if (in >= end) {
7665 const xmlChar *oldbase = ctxt->input->base;
7666 GROW;
7667 if (oldbase != ctxt->input->base) {
7668 long delta = ctxt->input->base - oldbase;
7669 start = start + delta;
7670 in = in + delta;
7671 }
7672 end = ctxt->input->end;
7673 }
7674 }
7675 last = in;
7676 /*
7677 * skip the trailing blanks
7678 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007679 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007680 while ((in < end) && (*in != limit) &&
7681 ((*in == 0x20) || (*in == 0x9) ||
7682 (*in == 0xA) || (*in == 0xD))) {
7683 in++;
7684 if (in >= end) {
7685 const xmlChar *oldbase = ctxt->input->base;
7686 GROW;
7687 if (oldbase != ctxt->input->base) {
7688 long delta = ctxt->input->base - oldbase;
7689 start = start + delta;
7690 in = in + delta;
7691 last = last + delta;
7692 }
7693 end = ctxt->input->end;
7694 }
7695 }
7696 if (*in != limit) goto need_complex;
7697 } else {
7698 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7699 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7700 in++;
7701 if (in >= end) {
7702 const xmlChar *oldbase = ctxt->input->base;
7703 GROW;
7704 if (oldbase != ctxt->input->base) {
7705 long delta = ctxt->input->base - oldbase;
7706 start = start + delta;
7707 in = in + delta;
7708 }
7709 end = ctxt->input->end;
7710 }
7711 }
7712 last = in;
7713 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007714 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007715 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007716 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007717 *len = last - start;
7718 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007719 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007720 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007721 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007722 }
7723 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007724 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007725 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007726need_complex:
7727 if (alloc) *alloc = 1;
7728 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729}
7730
7731/**
7732 * xmlParseAttribute2:
7733 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007734 * @pref: the element prefix
7735 * @elem: the element name
7736 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007737 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007738 * @len: an int * to save the length of the attribute
7739 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007740 *
7741 * parse an attribute in the new SAX2 framework.
7742 *
7743 * Returns the attribute name, and the value in *value, .
7744 */
7745
7746static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007747xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7748 const xmlChar *pref, const xmlChar *elem,
7749 const xmlChar **prefix, xmlChar **value,
7750 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007751 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007752 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007753 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007754
7755 *value = NULL;
7756 GROW;
7757 name = xmlParseQName(ctxt, prefix);
7758 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007759 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7760 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007761 return(NULL);
7762 }
7763
7764 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007765 * get the type if needed
7766 */
7767 if (ctxt->attsSpecial != NULL) {
7768 int type;
7769
7770 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7771 pref, elem, *prefix, name);
7772 if (type != 0) normalize = 1;
7773 }
7774
7775 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007776 * read the value
7777 */
7778 SKIP_BLANKS;
7779 if (RAW == '=') {
7780 NEXT;
7781 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007782 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007783 ctxt->instate = XML_PARSER_CONTENT;
7784 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007785 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007786 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007787 return(NULL);
7788 }
7789
Daniel Veillardd8925572005-06-08 22:34:55 +00007790 if (*prefix == ctxt->str_xml) {
7791 /*
7792 * Check that xml:lang conforms to the specification
7793 * No more registered as an error, just generate a warning now
7794 * since this was deprecated in XML second edition
7795 */
7796 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7797 internal_val = xmlStrndup(val, *len);
7798 if (!xmlCheckLanguageID(internal_val)) {
7799 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7800 "Malformed value for xml:lang : %s\n",
7801 internal_val, NULL);
7802 }
7803 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007804
Daniel Veillardd8925572005-06-08 22:34:55 +00007805 /*
7806 * Check that xml:space conforms to the specification
7807 */
7808 if (xmlStrEqual(name, BAD_CAST "space")) {
7809 internal_val = xmlStrndup(val, *len);
7810 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7811 *(ctxt->space) = 0;
7812 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7813 *(ctxt->space) = 1;
7814 else {
7815 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007816"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007817 internal_val, NULL);
7818 }
7819 }
7820 if (internal_val) {
7821 xmlFree(internal_val);
7822 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007823 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007824
7825 *value = val;
7826 return(name);
7827}
7828
7829/**
7830 * xmlParseStartTag2:
7831 * @ctxt: an XML parser context
7832 *
7833 * parse a start of tag either for rule element or
7834 * EmptyElement. In both case we don't parse the tag closing chars.
7835 * This routine is called when running SAX2 parsing
7836 *
7837 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7838 *
7839 * [ WFC: Unique Att Spec ]
7840 * No attribute name may appear more than once in the same start-tag or
7841 * empty-element tag.
7842 *
7843 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7844 *
7845 * [ WFC: Unique Att Spec ]
7846 * No attribute name may appear more than once in the same start-tag or
7847 * empty-element tag.
7848 *
7849 * With namespace:
7850 *
7851 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7852 *
7853 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7854 *
7855 * Returns the element name parsed
7856 */
7857
7858static const xmlChar *
7859xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007860 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007861 const xmlChar *localname;
7862 const xmlChar *prefix;
7863 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007864 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865 const xmlChar *nsname;
7866 xmlChar *attvalue;
7867 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007868 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007869 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007870 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007871 const xmlChar *base;
7872 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007873 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007874
7875 if (RAW != '<') return(NULL);
7876 NEXT1;
7877
7878 /*
7879 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7880 * point since the attribute values may be stored as pointers to
7881 * the buffer and calling SHRINK would destroy them !
7882 * The Shrinking is only possible once the full set of attribute
7883 * callbacks have been done.
7884 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007885reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007886 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007887 base = ctxt->input->base;
7888 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00007889 oldline = ctxt->input->line;
7890 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007891 nbatts = 0;
7892 nratts = 0;
7893 nbdef = 0;
7894 nbNs = 0;
7895 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007896 /* Forget any namespaces added during an earlier parse of this element. */
7897 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007898
7899 localname = xmlParseQName(ctxt, &prefix);
7900 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007901 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7902 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007903 return(NULL);
7904 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007905 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007906
7907 /*
7908 * Now parse the attributes, it ends up with the ending
7909 *
7910 * (S Attribute)* S?
7911 */
7912 SKIP_BLANKS;
7913 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007914 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007915
7916 while ((RAW != '>') &&
7917 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007918 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007919 const xmlChar *q = CUR_PTR;
7920 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007921 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007922
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007923 attname = xmlParseAttribute2(ctxt, prefix, localname,
7924 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00007925 if (ctxt->input->base != base) {
7926 if ((attvalue != NULL) && (alloc != 0))
7927 xmlFree(attvalue);
7928 attvalue = NULL;
7929 goto base_changed;
7930 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007931 if ((attname != NULL) && (attvalue != NULL)) {
7932 if (len < 0) len = xmlStrlen(attvalue);
7933 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007934 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7935 xmlURIPtr uri;
7936
7937 if (*URL != 0) {
7938 uri = xmlParseURI((const char *) URL);
7939 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007940 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7941 "xmlns: %s not a valid URI\n",
7942 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007943 } else {
7944 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007945 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7946 "xmlns: URI %s is not absolute\n",
7947 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007948 }
7949 xmlFreeURI(uri);
7950 }
7951 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007952 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007953 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007954 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007955 for (j = 1;j <= nbNs;j++)
7956 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7957 break;
7958 if (j <= nbNs)
7959 xmlErrAttributeDup(ctxt, NULL, attname);
7960 else
7961 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007962 if (alloc != 0) xmlFree(attvalue);
7963 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 continue;
7965 }
7966 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007967 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7968 xmlURIPtr uri;
7969
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007970 if (attname == ctxt->str_xml) {
7971 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007972 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7973 "xml namespace prefix mapped to wrong URI\n",
7974 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007975 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007976 /*
7977 * Do not keep a namespace definition node
7978 */
7979 if (alloc != 0) xmlFree(attvalue);
7980 SKIP_BLANKS;
7981 continue;
7982 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007983 uri = xmlParseURI((const char *) URL);
7984 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007985 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7986 "xmlns:%s: '%s' is not a valid URI\n",
7987 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007988 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007989 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007990 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7991 "xmlns:%s: URI %s is not absolute\n",
7992 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007993 }
7994 xmlFreeURI(uri);
7995 }
7996
Daniel Veillard0fb18932003-09-07 09:14:37 +00007997 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007999 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008000 for (j = 1;j <= nbNs;j++)
8001 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8002 break;
8003 if (j <= nbNs)
8004 xmlErrAttributeDup(ctxt, aprefix, attname);
8005 else
8006 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008007 if (alloc != 0) xmlFree(attvalue);
8008 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008009 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008010 continue;
8011 }
8012
8013 /*
8014 * Add the pair to atts
8015 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008016 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8017 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008018 if (attvalue[len] == 0)
8019 xmlFree(attvalue);
8020 goto failed;
8021 }
8022 maxatts = ctxt->maxatts;
8023 atts = ctxt->atts;
8024 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008025 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008026 atts[nbatts++] = attname;
8027 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008028 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008029 atts[nbatts++] = attvalue;
8030 attvalue += len;
8031 atts[nbatts++] = attvalue;
8032 /*
8033 * tag if some deallocation is needed
8034 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008035 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008036 } else {
8037 if ((attvalue != NULL) && (attvalue[len] == 0))
8038 xmlFree(attvalue);
8039 }
8040
8041failed:
8042
8043 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008044 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008045 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8046 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008047 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8049 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008050 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 }
8052 SKIP_BLANKS;
8053 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8054 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008056 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008057 break;
8058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008059 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008060 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008061 }
8062
Daniel Veillard0fb18932003-09-07 09:14:37 +00008063 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008064 * The attributes defaulting
8065 */
8066 if (ctxt->attsDefault != NULL) {
8067 xmlDefAttrsPtr defaults;
8068
8069 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8070 if (defaults != NULL) {
8071 for (i = 0;i < defaults->nbAttrs;i++) {
8072 attname = defaults->values[4 * i];
8073 aprefix = defaults->values[4 * i + 1];
8074
8075 /*
8076 * special work for namespaces defaulted defs
8077 */
8078 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8079 /*
8080 * check that it's not a defined namespace
8081 */
8082 for (j = 1;j <= nbNs;j++)
8083 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8084 break;
8085 if (j <= nbNs) continue;
8086
8087 nsname = xmlGetNamespace(ctxt, NULL);
8088 if (nsname != defaults->values[4 * i + 2]) {
8089 if (nsPush(ctxt, NULL,
8090 defaults->values[4 * i + 2]) > 0)
8091 nbNs++;
8092 }
8093 } else if (aprefix == ctxt->str_xmlns) {
8094 /*
8095 * check that it's not a defined namespace
8096 */
8097 for (j = 1;j <= nbNs;j++)
8098 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8099 break;
8100 if (j <= nbNs) continue;
8101
8102 nsname = xmlGetNamespace(ctxt, attname);
8103 if (nsname != defaults->values[2]) {
8104 if (nsPush(ctxt, attname,
8105 defaults->values[4 * i + 2]) > 0)
8106 nbNs++;
8107 }
8108 } else {
8109 /*
8110 * check that it's not a defined attribute
8111 */
8112 for (j = 0;j < nbatts;j+=5) {
8113 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8114 break;
8115 }
8116 if (j < nbatts) continue;
8117
8118 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8119 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008120 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008121 }
8122 maxatts = ctxt->maxatts;
8123 atts = ctxt->atts;
8124 }
8125 atts[nbatts++] = attname;
8126 atts[nbatts++] = aprefix;
8127 if (aprefix == NULL)
8128 atts[nbatts++] = NULL;
8129 else
8130 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8131 atts[nbatts++] = defaults->values[4 * i + 2];
8132 atts[nbatts++] = defaults->values[4 * i + 3];
8133 nbdef++;
8134 }
8135 }
8136 }
8137 }
8138
Daniel Veillarde70c8772003-11-25 07:21:18 +00008139 /*
8140 * The attributes checkings
8141 */
8142 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008143 /*
8144 * The default namespace does not apply to attribute names.
8145 */
8146 if (atts[i + 1] != NULL) {
8147 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8148 if (nsname == NULL) {
8149 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8150 "Namespace prefix %s for %s on %s is not defined\n",
8151 atts[i + 1], atts[i], localname);
8152 }
8153 atts[i + 2] = nsname;
8154 } else
8155 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008156 /*
8157 * [ WFC: Unique Att Spec ]
8158 * No attribute name may appear more than once in the same
8159 * start-tag or empty-element tag.
8160 * As extended by the Namespace in XML REC.
8161 */
8162 for (j = 0; j < i;j += 5) {
8163 if (atts[i] == atts[j]) {
8164 if (atts[i+1] == atts[j+1]) {
8165 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8166 break;
8167 }
8168 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8169 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8170 "Namespaced Attribute %s in '%s' redefined\n",
8171 atts[i], nsname, NULL);
8172 break;
8173 }
8174 }
8175 }
8176 }
8177
Daniel Veillarde57ec792003-09-10 10:50:59 +00008178 nsname = xmlGetNamespace(ctxt, prefix);
8179 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008180 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8181 "Namespace prefix %s on %s is not defined\n",
8182 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008183 }
8184 *pref = prefix;
8185 *URI = nsname;
8186
8187 /*
8188 * SAX: Start of Element !
8189 */
8190 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8191 (!ctxt->disableSAX)) {
8192 if (nbNs > 0)
8193 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8194 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8195 nbatts / 5, nbdef, atts);
8196 else
8197 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8198 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8199 }
8200
8201 /*
8202 * Free up attribute allocated strings if needed
8203 */
8204 if (attval != 0) {
8205 for (i = 3,j = 0; j < nratts;i += 5,j++)
8206 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8207 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008208 }
8209
8210 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008211
8212base_changed:
8213 /*
8214 * the attribute strings are valid iif the base didn't changed
8215 */
8216 if (attval != 0) {
8217 for (i = 3,j = 0; j < nratts;i += 5,j++)
8218 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8219 xmlFree((xmlChar *) atts[i]);
8220 }
8221 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008222 ctxt->input->line = oldline;
8223 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008224 if (ctxt->wellFormed == 1) {
8225 goto reparse;
8226 }
8227 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008228}
8229
8230/**
8231 * xmlParseEndTag2:
8232 * @ctxt: an XML parser context
8233 * @line: line of the start tag
8234 * @nsNr: number of namespaces on the start tag
8235 *
8236 * parse an end of tag
8237 *
8238 * [42] ETag ::= '</' Name S? '>'
8239 *
8240 * With namespace
8241 *
8242 * [NS 9] ETag ::= '</' QName S? '>'
8243 */
8244
8245static void
8246xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008247 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008248 const xmlChar *name;
8249
8250 GROW;
8251 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008252 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008253 return;
8254 }
8255 SKIP(2);
8256
William M. Brack13dfa872004-09-18 04:52:08 +00008257 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008258 if (ctxt->input->cur[tlen] == '>') {
8259 ctxt->input->cur += tlen + 1;
8260 goto done;
8261 }
8262 ctxt->input->cur += tlen;
8263 name = (xmlChar*)1;
8264 } else {
8265 if (prefix == NULL)
8266 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8267 else
8268 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8269 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008270
8271 /*
8272 * We should definitely be at the ending "S? '>'" part
8273 */
8274 GROW;
8275 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008276 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008277 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008278 } else
8279 NEXT1;
8280
8281 /*
8282 * [ WFC: Element Type Match ]
8283 * The Name in an element's end-tag must match the element type in the
8284 * start-tag.
8285 *
8286 */
8287 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008288 if (name == NULL) name = BAD_CAST "unparseable";
8289 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008290 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008291 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008292 }
8293
8294 /*
8295 * SAX: End of Tag
8296 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008297done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008298 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8299 (!ctxt->disableSAX))
8300 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8301
Daniel Veillard0fb18932003-09-07 09:14:37 +00008302 spacePop(ctxt);
8303 if (nsNr != 0)
8304 nsPop(ctxt, nsNr);
8305 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008306}
8307
8308/**
Owen Taylor3473f882001-02-23 17:55:21 +00008309 * xmlParseCDSect:
8310 * @ctxt: an XML parser context
8311 *
8312 * Parse escaped pure raw content.
8313 *
8314 * [18] CDSect ::= CDStart CData CDEnd
8315 *
8316 * [19] CDStart ::= '<![CDATA['
8317 *
8318 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8319 *
8320 * [21] CDEnd ::= ']]>'
8321 */
8322void
8323xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8324 xmlChar *buf = NULL;
8325 int len = 0;
8326 int size = XML_PARSER_BUFFER_SIZE;
8327 int r, rl;
8328 int s, sl;
8329 int cur, l;
8330 int count = 0;
8331
Daniel Veillard8f597c32003-10-06 08:19:27 +00008332 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008333 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008334 SKIP(9);
8335 } else
8336 return;
8337
8338 ctxt->instate = XML_PARSER_CDATA_SECTION;
8339 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008340 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008341 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008342 ctxt->instate = XML_PARSER_CONTENT;
8343 return;
8344 }
8345 NEXTL(rl);
8346 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008347 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008348 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008349 ctxt->instate = XML_PARSER_CONTENT;
8350 return;
8351 }
8352 NEXTL(sl);
8353 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008354 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008355 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008356 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008357 return;
8358 }
William M. Brack871611b2003-10-18 04:53:14 +00008359 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008360 ((r != ']') || (s != ']') || (cur != '>'))) {
8361 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008362 xmlChar *tmp;
8363
Owen Taylor3473f882001-02-23 17:55:21 +00008364 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008365 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8366 if (tmp == NULL) {
8367 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008368 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008369 return;
8370 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008371 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008372 }
8373 COPY_BUF(rl,buf,len,r);
8374 r = s;
8375 rl = sl;
8376 s = cur;
8377 sl = l;
8378 count++;
8379 if (count > 50) {
8380 GROW;
8381 count = 0;
8382 }
8383 NEXTL(l);
8384 cur = CUR_CHAR(l);
8385 }
8386 buf[len] = 0;
8387 ctxt->instate = XML_PARSER_CONTENT;
8388 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008389 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008390 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008391 xmlFree(buf);
8392 return;
8393 }
8394 NEXTL(l);
8395
8396 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008397 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008398 */
8399 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8400 if (ctxt->sax->cdataBlock != NULL)
8401 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008402 else if (ctxt->sax->characters != NULL)
8403 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008404 }
8405 xmlFree(buf);
8406}
8407
8408/**
8409 * xmlParseContent:
8410 * @ctxt: an XML parser context
8411 *
8412 * Parse a content:
8413 *
8414 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8415 */
8416
8417void
8418xmlParseContent(xmlParserCtxtPtr ctxt) {
8419 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008420 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008421 ((RAW != '<') || (NXT(1) != '/')) &&
8422 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008423 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008424 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008425 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008426
8427 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008428 * First case : a Processing Instruction.
8429 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008430 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008431 xmlParsePI(ctxt);
8432 }
8433
8434 /*
8435 * Second case : a CDSection
8436 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008437 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008438 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008439 xmlParseCDSect(ctxt);
8440 }
8441
8442 /*
8443 * Third case : a comment
8444 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008445 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008446 (NXT(2) == '-') && (NXT(3) == '-')) {
8447 xmlParseComment(ctxt);
8448 ctxt->instate = XML_PARSER_CONTENT;
8449 }
8450
8451 /*
8452 * Fourth case : a sub-element.
8453 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008454 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008455 xmlParseElement(ctxt);
8456 }
8457
8458 /*
8459 * Fifth case : a reference. If if has not been resolved,
8460 * parsing returns it's Name, create the node
8461 */
8462
Daniel Veillard21a0f912001-02-25 19:54:14 +00008463 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008464 xmlParseReference(ctxt);
8465 }
8466
8467 /*
8468 * Last case, text. Note that References are handled directly.
8469 */
8470 else {
8471 xmlParseCharData(ctxt, 0);
8472 }
8473
8474 GROW;
8475 /*
8476 * Pop-up of finished entities.
8477 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008478 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008479 xmlPopInput(ctxt);
8480 SHRINK;
8481
Daniel Veillardfdc91562002-07-01 21:52:03 +00008482 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008483 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8484 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008485 ctxt->instate = XML_PARSER_EOF;
8486 break;
8487 }
8488 }
8489}
8490
8491/**
8492 * xmlParseElement:
8493 * @ctxt: an XML parser context
8494 *
8495 * parse an XML element, this is highly recursive
8496 *
8497 * [39] element ::= EmptyElemTag | STag content ETag
8498 *
8499 * [ WFC: Element Type Match ]
8500 * The Name in an element's end-tag must match the element type in the
8501 * start-tag.
8502 *
Owen Taylor3473f882001-02-23 17:55:21 +00008503 */
8504
8505void
8506xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008507 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008508 const xmlChar *prefix;
8509 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008511 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008512 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008513 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008514
Daniel Veillard4a9fe382006-09-19 12:44:35 +00008515 if ((unsigned int) ctxt->nameNr > xmlParserMaxDepth) {
8516 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
8517 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
8518 xmlParserMaxDepth);
8519 ctxt->instate = XML_PARSER_EOF;
8520 return;
8521 }
8522
Owen Taylor3473f882001-02-23 17:55:21 +00008523 /* Capture start position */
8524 if (ctxt->record_info) {
8525 node_info.begin_pos = ctxt->input->consumed +
8526 (CUR_PTR - ctxt->input->base);
8527 node_info.begin_line = ctxt->input->line;
8528 }
8529
8530 if (ctxt->spaceNr == 0)
8531 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00008532 else if (*ctxt->space == -2)
8533 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00008534 else
8535 spacePush(ctxt, *ctxt->space);
8536
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008537 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008538#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008539 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008540#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008541 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008542#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 else
8544 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008545#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008546 if (name == NULL) {
8547 spacePop(ctxt);
8548 return;
8549 }
8550 namePush(ctxt, name);
8551 ret = ctxt->node;
8552
Daniel Veillard4432df22003-09-28 18:58:27 +00008553#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008554 /*
8555 * [ VC: Root Element Type ]
8556 * The Name in the document type declaration must match the element
8557 * type of the root element.
8558 */
8559 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8560 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8561 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008562#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008563
8564 /*
8565 * Check for an Empty Element.
8566 */
8567 if ((RAW == '/') && (NXT(1) == '>')) {
8568 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 if (ctxt->sax2) {
8570 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8571 (!ctxt->disableSAX))
8572 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008573#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008574 } else {
8575 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8576 (!ctxt->disableSAX))
8577 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008578#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008579 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008580 namePop(ctxt);
8581 spacePop(ctxt);
8582 if (nsNr != ctxt->nsNr)
8583 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 if ( ret != NULL && ctxt->record_info ) {
8585 node_info.end_pos = ctxt->input->consumed +
8586 (CUR_PTR - ctxt->input->base);
8587 node_info.end_line = ctxt->input->line;
8588 node_info.node = ret;
8589 xmlParserAddNodeInfo(ctxt, &node_info);
8590 }
8591 return;
8592 }
8593 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008594 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008595 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008596 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8597 "Couldn't find end of Start Tag %s line %d\n",
8598 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599
8600 /*
8601 * end of parsing of this node.
8602 */
8603 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008604 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008605 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606 if (nsNr != ctxt->nsNr)
8607 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 /*
8610 * Capture end position and add node
8611 */
8612 if ( ret != NULL && ctxt->record_info ) {
8613 node_info.end_pos = ctxt->input->consumed +
8614 (CUR_PTR - ctxt->input->base);
8615 node_info.end_line = ctxt->input->line;
8616 node_info.node = ret;
8617 xmlParserAddNodeInfo(ctxt, &node_info);
8618 }
8619 return;
8620 }
8621
8622 /*
8623 * Parse the content of the element:
8624 */
8625 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008626 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008627 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008628 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008629 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008630
8631 /*
8632 * end of parsing of this node.
8633 */
8634 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008636 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 if (nsNr != ctxt->nsNr)
8638 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 return;
8640 }
8641
8642 /*
8643 * parse the end of tag: '</' should be here.
8644 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008645 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008646 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008647 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008648 }
8649#ifdef LIBXML_SAX1_ENABLED
8650 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008651 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008652#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008653
8654 /*
8655 * Capture end position and add node
8656 */
8657 if ( ret != NULL && ctxt->record_info ) {
8658 node_info.end_pos = ctxt->input->consumed +
8659 (CUR_PTR - ctxt->input->base);
8660 node_info.end_line = ctxt->input->line;
8661 node_info.node = ret;
8662 xmlParserAddNodeInfo(ctxt, &node_info);
8663 }
8664}
8665
8666/**
8667 * xmlParseVersionNum:
8668 * @ctxt: an XML parser context
8669 *
8670 * parse the XML version value.
8671 *
8672 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8673 *
8674 * Returns the string giving the XML version number, or NULL
8675 */
8676xmlChar *
8677xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8678 xmlChar *buf = NULL;
8679 int len = 0;
8680 int size = 10;
8681 xmlChar cur;
8682
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008683 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008684 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008685 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008686 return(NULL);
8687 }
8688 cur = CUR;
8689 while (((cur >= 'a') && (cur <= 'z')) ||
8690 ((cur >= 'A') && (cur <= 'Z')) ||
8691 ((cur >= '0') && (cur <= '9')) ||
8692 (cur == '_') || (cur == '.') ||
8693 (cur == ':') || (cur == '-')) {
8694 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008695 xmlChar *tmp;
8696
Owen Taylor3473f882001-02-23 17:55:21 +00008697 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008698 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8699 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008700 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008701 return(NULL);
8702 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008703 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008704 }
8705 buf[len++] = cur;
8706 NEXT;
8707 cur=CUR;
8708 }
8709 buf[len] = 0;
8710 return(buf);
8711}
8712
8713/**
8714 * xmlParseVersionInfo:
8715 * @ctxt: an XML parser context
8716 *
8717 * parse the XML version.
8718 *
8719 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8720 *
8721 * [25] Eq ::= S? '=' S?
8722 *
8723 * Returns the version string, e.g. "1.0"
8724 */
8725
8726xmlChar *
8727xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8728 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008729
Daniel Veillarda07050d2003-10-19 14:46:32 +00008730 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008731 SKIP(7);
8732 SKIP_BLANKS;
8733 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008734 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008735 return(NULL);
8736 }
8737 NEXT;
8738 SKIP_BLANKS;
8739 if (RAW == '"') {
8740 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008741 version = xmlParseVersionNum(ctxt);
8742 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008743 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008744 } else
8745 NEXT;
8746 } else if (RAW == '\''){
8747 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008748 version = xmlParseVersionNum(ctxt);
8749 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008750 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008751 } else
8752 NEXT;
8753 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008754 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008755 }
8756 }
8757 return(version);
8758}
8759
8760/**
8761 * xmlParseEncName:
8762 * @ctxt: an XML parser context
8763 *
8764 * parse the XML encoding name
8765 *
8766 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8767 *
8768 * Returns the encoding name value or NULL
8769 */
8770xmlChar *
8771xmlParseEncName(xmlParserCtxtPtr ctxt) {
8772 xmlChar *buf = NULL;
8773 int len = 0;
8774 int size = 10;
8775 xmlChar cur;
8776
8777 cur = CUR;
8778 if (((cur >= 'a') && (cur <= 'z')) ||
8779 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008780 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008781 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008782 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008783 return(NULL);
8784 }
8785
8786 buf[len++] = cur;
8787 NEXT;
8788 cur = CUR;
8789 while (((cur >= 'a') && (cur <= 'z')) ||
8790 ((cur >= 'A') && (cur <= 'Z')) ||
8791 ((cur >= '0') && (cur <= '9')) ||
8792 (cur == '.') || (cur == '_') ||
8793 (cur == '-')) {
8794 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008795 xmlChar *tmp;
8796
Owen Taylor3473f882001-02-23 17:55:21 +00008797 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008798 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8799 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008800 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008801 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008802 return(NULL);
8803 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008804 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008805 }
8806 buf[len++] = cur;
8807 NEXT;
8808 cur = CUR;
8809 if (cur == 0) {
8810 SHRINK;
8811 GROW;
8812 cur = CUR;
8813 }
8814 }
8815 buf[len] = 0;
8816 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008817 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008818 }
8819 return(buf);
8820}
8821
8822/**
8823 * xmlParseEncodingDecl:
8824 * @ctxt: an XML parser context
8825 *
8826 * parse the XML encoding declaration
8827 *
8828 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8829 *
8830 * this setups the conversion filters.
8831 *
8832 * Returns the encoding value or NULL
8833 */
8834
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008835const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008836xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8837 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008838
8839 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008840 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008841 SKIP(8);
8842 SKIP_BLANKS;
8843 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008844 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008845 return(NULL);
8846 }
8847 NEXT;
8848 SKIP_BLANKS;
8849 if (RAW == '"') {
8850 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008851 encoding = xmlParseEncName(ctxt);
8852 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008853 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008854 } else
8855 NEXT;
8856 } else if (RAW == '\''){
8857 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008858 encoding = xmlParseEncName(ctxt);
8859 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008860 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008861 } else
8862 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008863 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008864 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008865 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008866 /*
8867 * UTF-16 encoding stwich has already taken place at this stage,
8868 * more over the little-endian/big-endian selection is already done
8869 */
8870 if ((encoding != NULL) &&
8871 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8872 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008873 if (ctxt->encoding != NULL)
8874 xmlFree((xmlChar *) ctxt->encoding);
8875 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008876 }
8877 /*
8878 * UTF-8 encoding is handled natively
8879 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008880 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008881 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8882 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008883 if (ctxt->encoding != NULL)
8884 xmlFree((xmlChar *) ctxt->encoding);
8885 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008886 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008887 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008888 xmlCharEncodingHandlerPtr handler;
8889
8890 if (ctxt->input->encoding != NULL)
8891 xmlFree((xmlChar *) ctxt->input->encoding);
8892 ctxt->input->encoding = encoding;
8893
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008894 handler = xmlFindCharEncodingHandler((const char *) encoding);
8895 if (handler != NULL) {
8896 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008897 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008898 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008899 "Unsupported encoding %s\n", encoding);
8900 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008901 }
8902 }
8903 }
8904 return(encoding);
8905}
8906
8907/**
8908 * xmlParseSDDecl:
8909 * @ctxt: an XML parser context
8910 *
8911 * parse the XML standalone declaration
8912 *
8913 * [32] SDDecl ::= S 'standalone' Eq
8914 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8915 *
8916 * [ VC: Standalone Document Declaration ]
8917 * TODO The standalone document declaration must have the value "no"
8918 * if any external markup declarations contain declarations of:
8919 * - attributes with default values, if elements to which these
8920 * attributes apply appear in the document without specifications
8921 * of values for these attributes, or
8922 * - entities (other than amp, lt, gt, apos, quot), if references
8923 * to those entities appear in the document, or
8924 * - attributes with values subject to normalization, where the
8925 * attribute appears in the document with a value which will change
8926 * as a result of normalization, or
8927 * - element types with element content, if white space occurs directly
8928 * within any instance of those types.
8929 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008930 * Returns:
8931 * 1 if standalone="yes"
8932 * 0 if standalone="no"
8933 * -2 if standalone attribute is missing or invalid
8934 * (A standalone value of -2 means that the XML declaration was found,
8935 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00008936 */
8937
8938int
8939xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00008940 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00008941
8942 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008943 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008944 SKIP(10);
8945 SKIP_BLANKS;
8946 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008947 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008948 return(standalone);
8949 }
8950 NEXT;
8951 SKIP_BLANKS;
8952 if (RAW == '\''){
8953 NEXT;
8954 if ((RAW == 'n') && (NXT(1) == 'o')) {
8955 standalone = 0;
8956 SKIP(2);
8957 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8958 (NXT(2) == 's')) {
8959 standalone = 1;
8960 SKIP(3);
8961 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008962 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008963 }
8964 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008965 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008966 } else
8967 NEXT;
8968 } else if (RAW == '"'){
8969 NEXT;
8970 if ((RAW == 'n') && (NXT(1) == 'o')) {
8971 standalone = 0;
8972 SKIP(2);
8973 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8974 (NXT(2) == 's')) {
8975 standalone = 1;
8976 SKIP(3);
8977 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008978 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008979 }
8980 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008981 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008982 } else
8983 NEXT;
8984 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008985 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008986 }
8987 }
8988 return(standalone);
8989}
8990
8991/**
8992 * xmlParseXMLDecl:
8993 * @ctxt: an XML parser context
8994 *
8995 * parse an XML declaration header
8996 *
8997 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8998 */
8999
9000void
9001xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9002 xmlChar *version;
9003
9004 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009005 * This value for standalone indicates that the document has an
9006 * XML declaration but it does not have a standalone attribute.
9007 * It will be overwritten later if a standalone attribute is found.
9008 */
9009 ctxt->input->standalone = -2;
9010
9011 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009012 * We know that '<?xml' is here.
9013 */
9014 SKIP(5);
9015
William M. Brack76e95df2003-10-18 16:20:14 +00009016 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009017 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9018 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009019 }
9020 SKIP_BLANKS;
9021
9022 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009023 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009024 */
9025 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009026 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009027 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009028 } else {
9029 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9030 /*
9031 * TODO: Blueberry should be detected here
9032 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00009033 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9034 "Unsupported version '%s'\n",
9035 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009036 }
9037 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009038 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009039 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009040 }
Owen Taylor3473f882001-02-23 17:55:21 +00009041
9042 /*
9043 * We may have the encoding declaration
9044 */
William M. Brack76e95df2003-10-18 16:20:14 +00009045 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009046 if ((RAW == '?') && (NXT(1) == '>')) {
9047 SKIP(2);
9048 return;
9049 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009050 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009051 }
9052 xmlParseEncodingDecl(ctxt);
9053 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9054 /*
9055 * The XML REC instructs us to stop parsing right here
9056 */
9057 return;
9058 }
9059
9060 /*
9061 * We may have the standalone status.
9062 */
William M. Brack76e95df2003-10-18 16:20:14 +00009063 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009064 if ((RAW == '?') && (NXT(1) == '>')) {
9065 SKIP(2);
9066 return;
9067 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009068 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009069 }
9070 SKIP_BLANKS;
9071 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9072
9073 SKIP_BLANKS;
9074 if ((RAW == '?') && (NXT(1) == '>')) {
9075 SKIP(2);
9076 } else if (RAW == '>') {
9077 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009078 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009079 NEXT;
9080 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009081 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009082 MOVETO_ENDTAG(CUR_PTR);
9083 NEXT;
9084 }
9085}
9086
9087/**
9088 * xmlParseMisc:
9089 * @ctxt: an XML parser context
9090 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009091 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009092 *
9093 * [27] Misc ::= Comment | PI | S
9094 */
9095
9096void
9097xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009098 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009099 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009100 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009101 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009102 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009103 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009104 NEXT;
9105 } else
9106 xmlParseComment(ctxt);
9107 }
9108}
9109
9110/**
9111 * xmlParseDocument:
9112 * @ctxt: an XML parser context
9113 *
9114 * parse an XML document (and build a tree if using the standard SAX
9115 * interface).
9116 *
9117 * [1] document ::= prolog element Misc*
9118 *
9119 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9120 *
9121 * Returns 0, -1 in case of error. the parser context is augmented
9122 * as a result of the parsing.
9123 */
9124
9125int
9126xmlParseDocument(xmlParserCtxtPtr ctxt) {
9127 xmlChar start[4];
9128 xmlCharEncoding enc;
9129
9130 xmlInitParser();
9131
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009132 if ((ctxt == NULL) || (ctxt->input == NULL))
9133 return(-1);
9134
Owen Taylor3473f882001-02-23 17:55:21 +00009135 GROW;
9136
9137 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009138 * SAX: detecting the level.
9139 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009140 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009141
9142 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009143 * SAX: beginning of the document processing.
9144 */
9145 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9146 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9147
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009148 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9149 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009150 /*
9151 * Get the 4 first bytes and decode the charset
9152 * if enc != XML_CHAR_ENCODING_NONE
9153 * plug some encoding conversion routines.
9154 */
9155 start[0] = RAW;
9156 start[1] = NXT(1);
9157 start[2] = NXT(2);
9158 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009159 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009160 if (enc != XML_CHAR_ENCODING_NONE) {
9161 xmlSwitchEncoding(ctxt, enc);
9162 }
Owen Taylor3473f882001-02-23 17:55:21 +00009163 }
9164
9165
9166 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009167 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009168 }
9169
9170 /*
9171 * Check for the XMLDecl in the Prolog.
9172 */
9173 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009174 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009175
9176 /*
9177 * Note that we will switch encoding on the fly.
9178 */
9179 xmlParseXMLDecl(ctxt);
9180 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9181 /*
9182 * The XML REC instructs us to stop parsing right here
9183 */
9184 return(-1);
9185 }
9186 ctxt->standalone = ctxt->input->standalone;
9187 SKIP_BLANKS;
9188 } else {
9189 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9190 }
9191 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9192 ctxt->sax->startDocument(ctxt->userData);
9193
9194 /*
9195 * The Misc part of the Prolog
9196 */
9197 GROW;
9198 xmlParseMisc(ctxt);
9199
9200 /*
9201 * Then possibly doc type declaration(s) and more Misc
9202 * (doctypedecl Misc*)?
9203 */
9204 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009205 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009206
9207 ctxt->inSubset = 1;
9208 xmlParseDocTypeDecl(ctxt);
9209 if (RAW == '[') {
9210 ctxt->instate = XML_PARSER_DTD;
9211 xmlParseInternalSubset(ctxt);
9212 }
9213
9214 /*
9215 * Create and update the external subset.
9216 */
9217 ctxt->inSubset = 2;
9218 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9219 (!ctxt->disableSAX))
9220 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9221 ctxt->extSubSystem, ctxt->extSubURI);
9222 ctxt->inSubset = 0;
9223
9224
9225 ctxt->instate = XML_PARSER_PROLOG;
9226 xmlParseMisc(ctxt);
9227 }
9228
9229 /*
9230 * Time to start parsing the tree itself
9231 */
9232 GROW;
9233 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009234 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9235 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009236 } else {
9237 ctxt->instate = XML_PARSER_CONTENT;
9238 xmlParseElement(ctxt);
9239 ctxt->instate = XML_PARSER_EPILOG;
9240
9241
9242 /*
9243 * The Misc part at the end
9244 */
9245 xmlParseMisc(ctxt);
9246
Daniel Veillard561b7f82002-03-20 21:55:57 +00009247 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009248 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009249 }
9250 ctxt->instate = XML_PARSER_EOF;
9251 }
9252
9253 /*
9254 * SAX: end of the document processing.
9255 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009256 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009257 ctxt->sax->endDocument(ctxt->userData);
9258
Daniel Veillard5997aca2002-03-18 18:36:20 +00009259 /*
9260 * Remove locally kept entity definitions if the tree was not built
9261 */
9262 if ((ctxt->myDoc != NULL) &&
9263 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9264 xmlFreeDoc(ctxt->myDoc);
9265 ctxt->myDoc = NULL;
9266 }
9267
Daniel Veillardc7612992002-02-17 22:47:37 +00009268 if (! ctxt->wellFormed) {
9269 ctxt->valid = 0;
9270 return(-1);
9271 }
Owen Taylor3473f882001-02-23 17:55:21 +00009272 return(0);
9273}
9274
9275/**
9276 * xmlParseExtParsedEnt:
9277 * @ctxt: an XML parser context
9278 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009279 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009280 * An external general parsed entity is well-formed if it matches the
9281 * production labeled extParsedEnt.
9282 *
9283 * [78] extParsedEnt ::= TextDecl? content
9284 *
9285 * Returns 0, -1 in case of error. the parser context is augmented
9286 * as a result of the parsing.
9287 */
9288
9289int
9290xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9291 xmlChar start[4];
9292 xmlCharEncoding enc;
9293
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009294 if ((ctxt == NULL) || (ctxt->input == NULL))
9295 return(-1);
9296
Owen Taylor3473f882001-02-23 17:55:21 +00009297 xmlDefaultSAXHandlerInit();
9298
Daniel Veillard309f81d2003-09-23 09:02:53 +00009299 xmlDetectSAX2(ctxt);
9300
Owen Taylor3473f882001-02-23 17:55:21 +00009301 GROW;
9302
9303 /*
9304 * SAX: beginning of the document processing.
9305 */
9306 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9307 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9308
9309 /*
9310 * Get the 4 first bytes and decode the charset
9311 * if enc != XML_CHAR_ENCODING_NONE
9312 * plug some encoding conversion routines.
9313 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009314 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9315 start[0] = RAW;
9316 start[1] = NXT(1);
9317 start[2] = NXT(2);
9318 start[3] = NXT(3);
9319 enc = xmlDetectCharEncoding(start, 4);
9320 if (enc != XML_CHAR_ENCODING_NONE) {
9321 xmlSwitchEncoding(ctxt, enc);
9322 }
Owen Taylor3473f882001-02-23 17:55:21 +00009323 }
9324
9325
9326 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009327 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009328 }
9329
9330 /*
9331 * Check for the XMLDecl in the Prolog.
9332 */
9333 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009334 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009335
9336 /*
9337 * Note that we will switch encoding on the fly.
9338 */
9339 xmlParseXMLDecl(ctxt);
9340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9341 /*
9342 * The XML REC instructs us to stop parsing right here
9343 */
9344 return(-1);
9345 }
9346 SKIP_BLANKS;
9347 } else {
9348 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9349 }
9350 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9351 ctxt->sax->startDocument(ctxt->userData);
9352
9353 /*
9354 * Doing validity checking on chunk doesn't make sense
9355 */
9356 ctxt->instate = XML_PARSER_CONTENT;
9357 ctxt->validate = 0;
9358 ctxt->loadsubset = 0;
9359 ctxt->depth = 0;
9360
9361 xmlParseContent(ctxt);
9362
9363 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009364 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009365 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009366 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009367 }
9368
9369 /*
9370 * SAX: end of the document processing.
9371 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009372 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009373 ctxt->sax->endDocument(ctxt->userData);
9374
9375 if (! ctxt->wellFormed) return(-1);
9376 return(0);
9377}
9378
Daniel Veillard73b013f2003-09-30 12:36:01 +00009379#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009380/************************************************************************
9381 * *
9382 * Progressive parsing interfaces *
9383 * *
9384 ************************************************************************/
9385
9386/**
9387 * xmlParseLookupSequence:
9388 * @ctxt: an XML parser context
9389 * @first: the first char to lookup
9390 * @next: the next char to lookup or zero
9391 * @third: the next char to lookup or zero
9392 *
9393 * Try to find if a sequence (first, next, third) or just (first next) or
9394 * (first) is available in the input stream.
9395 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9396 * to avoid rescanning sequences of bytes, it DOES change the state of the
9397 * parser, do not use liberally.
9398 *
9399 * Returns the index to the current parsing point if the full sequence
9400 * is available, -1 otherwise.
9401 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009402static int
Owen Taylor3473f882001-02-23 17:55:21 +00009403xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9404 xmlChar next, xmlChar third) {
9405 int base, len;
9406 xmlParserInputPtr in;
9407 const xmlChar *buf;
9408
9409 in = ctxt->input;
9410 if (in == NULL) return(-1);
9411 base = in->cur - in->base;
9412 if (base < 0) return(-1);
9413 if (ctxt->checkIndex > base)
9414 base = ctxt->checkIndex;
9415 if (in->buf == NULL) {
9416 buf = in->base;
9417 len = in->length;
9418 } else {
9419 buf = in->buf->buffer->content;
9420 len = in->buf->buffer->use;
9421 }
9422 /* take into account the sequence length */
9423 if (third) len -= 2;
9424 else if (next) len --;
9425 for (;base < len;base++) {
9426 if (buf[base] == first) {
9427 if (third != 0) {
9428 if ((buf[base + 1] != next) ||
9429 (buf[base + 2] != third)) continue;
9430 } else if (next != 0) {
9431 if (buf[base + 1] != next) continue;
9432 }
9433 ctxt->checkIndex = 0;
9434#ifdef DEBUG_PUSH
9435 if (next == 0)
9436 xmlGenericError(xmlGenericErrorContext,
9437 "PP: lookup '%c' found at %d\n",
9438 first, base);
9439 else if (third == 0)
9440 xmlGenericError(xmlGenericErrorContext,
9441 "PP: lookup '%c%c' found at %d\n",
9442 first, next, base);
9443 else
9444 xmlGenericError(xmlGenericErrorContext,
9445 "PP: lookup '%c%c%c' found at %d\n",
9446 first, next, third, base);
9447#endif
9448 return(base - (in->cur - in->base));
9449 }
9450 }
9451 ctxt->checkIndex = base;
9452#ifdef DEBUG_PUSH
9453 if (next == 0)
9454 xmlGenericError(xmlGenericErrorContext,
9455 "PP: lookup '%c' failed\n", first);
9456 else if (third == 0)
9457 xmlGenericError(xmlGenericErrorContext,
9458 "PP: lookup '%c%c' failed\n", first, next);
9459 else
9460 xmlGenericError(xmlGenericErrorContext,
9461 "PP: lookup '%c%c%c' failed\n", first, next, third);
9462#endif
9463 return(-1);
9464}
9465
9466/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009467 * xmlParseGetLasts:
9468 * @ctxt: an XML parser context
9469 * @lastlt: pointer to store the last '<' from the input
9470 * @lastgt: pointer to store the last '>' from the input
9471 *
9472 * Lookup the last < and > in the current chunk
9473 */
9474static void
9475xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9476 const xmlChar **lastgt) {
9477 const xmlChar *tmp;
9478
9479 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9480 xmlGenericError(xmlGenericErrorContext,
9481 "Internal error: xmlParseGetLasts\n");
9482 return;
9483 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009484 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009485 tmp = ctxt->input->end;
9486 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009487 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009488 if (tmp < ctxt->input->base) {
9489 *lastlt = NULL;
9490 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009491 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009492 *lastlt = tmp;
9493 tmp++;
9494 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9495 if (*tmp == '\'') {
9496 tmp++;
9497 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9498 if (tmp < ctxt->input->end) tmp++;
9499 } else if (*tmp == '"') {
9500 tmp++;
9501 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9502 if (tmp < ctxt->input->end) tmp++;
9503 } else
9504 tmp++;
9505 }
9506 if (tmp < ctxt->input->end)
9507 *lastgt = tmp;
9508 else {
9509 tmp = *lastlt;
9510 tmp--;
9511 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9512 if (tmp >= ctxt->input->base)
9513 *lastgt = tmp;
9514 else
9515 *lastgt = NULL;
9516 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009517 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009518 } else {
9519 *lastlt = NULL;
9520 *lastgt = NULL;
9521 }
9522}
9523/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009524 * xmlCheckCdataPush:
9525 * @cur: pointer to the bock of characters
9526 * @len: length of the block in bytes
9527 *
9528 * Check that the block of characters is okay as SCdata content [20]
9529 *
9530 * Returns the number of bytes to pass if okay, a negative index where an
9531 * UTF-8 error occured otherwise
9532 */
9533static int
9534xmlCheckCdataPush(const xmlChar *utf, int len) {
9535 int ix;
9536 unsigned char c;
9537 int codepoint;
9538
9539 if ((utf == NULL) || (len <= 0))
9540 return(0);
9541
9542 for (ix = 0; ix < len;) { /* string is 0-terminated */
9543 c = utf[ix];
9544 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9545 if (c >= 0x20)
9546 ix++;
9547 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9548 ix++;
9549 else
9550 return(-ix);
9551 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9552 if (ix + 2 > len) return(ix);
9553 if ((utf[ix+1] & 0xc0 ) != 0x80)
9554 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009555 codepoint = (utf[ix] & 0x1f) << 6;
9556 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009557 if (!xmlIsCharQ(codepoint))
9558 return(-ix);
9559 ix += 2;
9560 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9561 if (ix + 3 > len) return(ix);
9562 if (((utf[ix+1] & 0xc0) != 0x80) ||
9563 ((utf[ix+2] & 0xc0) != 0x80))
9564 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009565 codepoint = (utf[ix] & 0xf) << 12;
9566 codepoint |= (utf[ix+1] & 0x3f) << 6;
9567 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009568 if (!xmlIsCharQ(codepoint))
9569 return(-ix);
9570 ix += 3;
9571 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9572 if (ix + 4 > len) return(ix);
9573 if (((utf[ix+1] & 0xc0) != 0x80) ||
9574 ((utf[ix+2] & 0xc0) != 0x80) ||
9575 ((utf[ix+3] & 0xc0) != 0x80))
9576 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009577 codepoint = (utf[ix] & 0x7) << 18;
9578 codepoint |= (utf[ix+1] & 0x3f) << 12;
9579 codepoint |= (utf[ix+2] & 0x3f) << 6;
9580 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009581 if (!xmlIsCharQ(codepoint))
9582 return(-ix);
9583 ix += 4;
9584 } else /* unknown encoding */
9585 return(-ix);
9586 }
9587 return(ix);
9588}
9589
9590/**
Owen Taylor3473f882001-02-23 17:55:21 +00009591 * xmlParseTryOrFinish:
9592 * @ctxt: an XML parser context
9593 * @terminate: last chunk indicator
9594 *
9595 * Try to progress on parsing
9596 *
9597 * Returns zero if no parsing was possible
9598 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009599static int
Owen Taylor3473f882001-02-23 17:55:21 +00009600xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9601 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009602 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009603 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009604 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009605
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009606 if (ctxt->input == NULL)
9607 return(0);
9608
Owen Taylor3473f882001-02-23 17:55:21 +00009609#ifdef DEBUG_PUSH
9610 switch (ctxt->instate) {
9611 case XML_PARSER_EOF:
9612 xmlGenericError(xmlGenericErrorContext,
9613 "PP: try EOF\n"); break;
9614 case XML_PARSER_START:
9615 xmlGenericError(xmlGenericErrorContext,
9616 "PP: try START\n"); break;
9617 case XML_PARSER_MISC:
9618 xmlGenericError(xmlGenericErrorContext,
9619 "PP: try MISC\n");break;
9620 case XML_PARSER_COMMENT:
9621 xmlGenericError(xmlGenericErrorContext,
9622 "PP: try COMMENT\n");break;
9623 case XML_PARSER_PROLOG:
9624 xmlGenericError(xmlGenericErrorContext,
9625 "PP: try PROLOG\n");break;
9626 case XML_PARSER_START_TAG:
9627 xmlGenericError(xmlGenericErrorContext,
9628 "PP: try START_TAG\n");break;
9629 case XML_PARSER_CONTENT:
9630 xmlGenericError(xmlGenericErrorContext,
9631 "PP: try CONTENT\n");break;
9632 case XML_PARSER_CDATA_SECTION:
9633 xmlGenericError(xmlGenericErrorContext,
9634 "PP: try CDATA_SECTION\n");break;
9635 case XML_PARSER_END_TAG:
9636 xmlGenericError(xmlGenericErrorContext,
9637 "PP: try END_TAG\n");break;
9638 case XML_PARSER_ENTITY_DECL:
9639 xmlGenericError(xmlGenericErrorContext,
9640 "PP: try ENTITY_DECL\n");break;
9641 case XML_PARSER_ENTITY_VALUE:
9642 xmlGenericError(xmlGenericErrorContext,
9643 "PP: try ENTITY_VALUE\n");break;
9644 case XML_PARSER_ATTRIBUTE_VALUE:
9645 xmlGenericError(xmlGenericErrorContext,
9646 "PP: try ATTRIBUTE_VALUE\n");break;
9647 case XML_PARSER_DTD:
9648 xmlGenericError(xmlGenericErrorContext,
9649 "PP: try DTD\n");break;
9650 case XML_PARSER_EPILOG:
9651 xmlGenericError(xmlGenericErrorContext,
9652 "PP: try EPILOG\n");break;
9653 case XML_PARSER_PI:
9654 xmlGenericError(xmlGenericErrorContext,
9655 "PP: try PI\n");break;
9656 case XML_PARSER_IGNORE:
9657 xmlGenericError(xmlGenericErrorContext,
9658 "PP: try IGNORE\n");break;
9659 }
9660#endif
9661
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009662 if ((ctxt->input != NULL) &&
9663 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009664 xmlSHRINK(ctxt);
9665 ctxt->checkIndex = 0;
9666 }
9667 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009668
Daniel Veillarda880b122003-04-21 21:36:41 +00009669 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009670 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009671 return(0);
9672
9673
Owen Taylor3473f882001-02-23 17:55:21 +00009674 /*
9675 * Pop-up of finished entities.
9676 */
9677 while ((RAW == 0) && (ctxt->inputNr > 1))
9678 xmlPopInput(ctxt);
9679
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009680 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009681 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009682 avail = ctxt->input->length -
9683 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009684 else {
9685 /*
9686 * If we are operating on converted input, try to flush
9687 * remainng chars to avoid them stalling in the non-converted
9688 * buffer.
9689 */
9690 if ((ctxt->input->buf->raw != NULL) &&
9691 (ctxt->input->buf->raw->use > 0)) {
9692 int base = ctxt->input->base -
9693 ctxt->input->buf->buffer->content;
9694 int current = ctxt->input->cur - ctxt->input->base;
9695
9696 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9697 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9698 ctxt->input->cur = ctxt->input->base + current;
9699 ctxt->input->end =
9700 &ctxt->input->buf->buffer->content[
9701 ctxt->input->buf->buffer->use];
9702 }
9703 avail = ctxt->input->buf->buffer->use -
9704 (ctxt->input->cur - ctxt->input->base);
9705 }
Owen Taylor3473f882001-02-23 17:55:21 +00009706 if (avail < 1)
9707 goto done;
9708 switch (ctxt->instate) {
9709 case XML_PARSER_EOF:
9710 /*
9711 * Document parsing is done !
9712 */
9713 goto done;
9714 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009715 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9716 xmlChar start[4];
9717 xmlCharEncoding enc;
9718
9719 /*
9720 * Very first chars read from the document flow.
9721 */
9722 if (avail < 4)
9723 goto done;
9724
9725 /*
9726 * Get the 4 first bytes and decode the charset
9727 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009728 * plug some encoding conversion routines,
9729 * else xmlSwitchEncoding will set to (default)
9730 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009731 */
9732 start[0] = RAW;
9733 start[1] = NXT(1);
9734 start[2] = NXT(2);
9735 start[3] = NXT(3);
9736 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009737 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009738 break;
9739 }
Owen Taylor3473f882001-02-23 17:55:21 +00009740
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009741 if (avail < 2)
9742 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009743 cur = ctxt->input->cur[0];
9744 next = ctxt->input->cur[1];
9745 if (cur == 0) {
9746 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9747 ctxt->sax->setDocumentLocator(ctxt->userData,
9748 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009749 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009750 ctxt->instate = XML_PARSER_EOF;
9751#ifdef DEBUG_PUSH
9752 xmlGenericError(xmlGenericErrorContext,
9753 "PP: entering EOF\n");
9754#endif
9755 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9756 ctxt->sax->endDocument(ctxt->userData);
9757 goto done;
9758 }
9759 if ((cur == '<') && (next == '?')) {
9760 /* PI or XML decl */
9761 if (avail < 5) return(ret);
9762 if ((!terminate) &&
9763 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9764 return(ret);
9765 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9766 ctxt->sax->setDocumentLocator(ctxt->userData,
9767 &xmlDefaultSAXLocator);
9768 if ((ctxt->input->cur[2] == 'x') &&
9769 (ctxt->input->cur[3] == 'm') &&
9770 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009771 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009772 ret += 5;
9773#ifdef DEBUG_PUSH
9774 xmlGenericError(xmlGenericErrorContext,
9775 "PP: Parsing XML Decl\n");
9776#endif
9777 xmlParseXMLDecl(ctxt);
9778 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9779 /*
9780 * The XML REC instructs us to stop parsing right
9781 * here
9782 */
9783 ctxt->instate = XML_PARSER_EOF;
9784 return(0);
9785 }
9786 ctxt->standalone = ctxt->input->standalone;
9787 if ((ctxt->encoding == NULL) &&
9788 (ctxt->input->encoding != NULL))
9789 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9790 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9791 (!ctxt->disableSAX))
9792 ctxt->sax->startDocument(ctxt->userData);
9793 ctxt->instate = XML_PARSER_MISC;
9794#ifdef DEBUG_PUSH
9795 xmlGenericError(xmlGenericErrorContext,
9796 "PP: entering MISC\n");
9797#endif
9798 } else {
9799 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9800 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9801 (!ctxt->disableSAX))
9802 ctxt->sax->startDocument(ctxt->userData);
9803 ctxt->instate = XML_PARSER_MISC;
9804#ifdef DEBUG_PUSH
9805 xmlGenericError(xmlGenericErrorContext,
9806 "PP: entering MISC\n");
9807#endif
9808 }
9809 } else {
9810 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9811 ctxt->sax->setDocumentLocator(ctxt->userData,
9812 &xmlDefaultSAXLocator);
9813 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009814 if (ctxt->version == NULL) {
9815 xmlErrMemory(ctxt, NULL);
9816 break;
9817 }
Owen Taylor3473f882001-02-23 17:55:21 +00009818 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9819 (!ctxt->disableSAX))
9820 ctxt->sax->startDocument(ctxt->userData);
9821 ctxt->instate = XML_PARSER_MISC;
9822#ifdef DEBUG_PUSH
9823 xmlGenericError(xmlGenericErrorContext,
9824 "PP: entering MISC\n");
9825#endif
9826 }
9827 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009828 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009829 const xmlChar *name;
9830 const xmlChar *prefix;
9831 const xmlChar *URI;
9832 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009833
9834 if ((avail < 2) && (ctxt->inputNr == 1))
9835 goto done;
9836 cur = ctxt->input->cur[0];
9837 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009838 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009839 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9841 ctxt->sax->endDocument(ctxt->userData);
9842 goto done;
9843 }
9844 if (!terminate) {
9845 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009846 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009847 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009848 goto done;
9849 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9850 goto done;
9851 }
9852 }
9853 if (ctxt->spaceNr == 0)
9854 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009855 else if (*ctxt->space == -2)
9856 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +00009857 else
9858 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009859#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009860 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009861#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009862 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009863#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009864 else
9865 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009866#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009867 if (name == NULL) {
9868 spacePop(ctxt);
9869 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009870 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9871 ctxt->sax->endDocument(ctxt->userData);
9872 goto done;
9873 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009874#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009875 /*
9876 * [ VC: Root Element Type ]
9877 * The Name in the document type declaration must match
9878 * the element type of the root element.
9879 */
9880 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9881 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9882 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009883#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009884
9885 /*
9886 * Check for an Empty Element.
9887 */
9888 if ((RAW == '/') && (NXT(1) == '>')) {
9889 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009890
9891 if (ctxt->sax2) {
9892 if ((ctxt->sax != NULL) &&
9893 (ctxt->sax->endElementNs != NULL) &&
9894 (!ctxt->disableSAX))
9895 ctxt->sax->endElementNs(ctxt->userData, name,
9896 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009897 if (ctxt->nsNr - nsNr > 0)
9898 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009899#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009900 } else {
9901 if ((ctxt->sax != NULL) &&
9902 (ctxt->sax->endElement != NULL) &&
9903 (!ctxt->disableSAX))
9904 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009905#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009906 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009907 spacePop(ctxt);
9908 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009910 } else {
9911 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009912 }
9913 break;
9914 }
9915 if (RAW == '>') {
9916 NEXT;
9917 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009918 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009919 "Couldn't find end of Start Tag %s\n",
9920 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009921 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009922 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009923 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009924 if (ctxt->sax2)
9925 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009926#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009927 else
9928 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009929#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009930
Daniel Veillarda880b122003-04-21 21:36:41 +00009931 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009932 break;
9933 }
9934 case XML_PARSER_CONTENT: {
9935 const xmlChar *test;
9936 unsigned int cons;
9937 if ((avail < 2) && (ctxt->inputNr == 1))
9938 goto done;
9939 cur = ctxt->input->cur[0];
9940 next = ctxt->input->cur[1];
9941
9942 test = CUR_PTR;
9943 cons = ctxt->input->consumed;
9944 if ((cur == '<') && (next == '/')) {
9945 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009946 break;
9947 } else if ((cur == '<') && (next == '?')) {
9948 if ((!terminate) &&
9949 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9950 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009951 xmlParsePI(ctxt);
9952 } else if ((cur == '<') && (next != '!')) {
9953 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009954 break;
9955 } else if ((cur == '<') && (next == '!') &&
9956 (ctxt->input->cur[2] == '-') &&
9957 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +00009958 int term;
9959
9960 if (avail < 4)
9961 goto done;
9962 ctxt->input->cur += 4;
9963 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
9964 ctxt->input->cur -= 4;
9965 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +00009966 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009967 xmlParseComment(ctxt);
9968 ctxt->instate = XML_PARSER_CONTENT;
9969 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9970 (ctxt->input->cur[2] == '[') &&
9971 (ctxt->input->cur[3] == 'C') &&
9972 (ctxt->input->cur[4] == 'D') &&
9973 (ctxt->input->cur[5] == 'A') &&
9974 (ctxt->input->cur[6] == 'T') &&
9975 (ctxt->input->cur[7] == 'A') &&
9976 (ctxt->input->cur[8] == '[')) {
9977 SKIP(9);
9978 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009979 break;
9980 } else if ((cur == '<') && (next == '!') &&
9981 (avail < 9)) {
9982 goto done;
9983 } else if (cur == '&') {
9984 if ((!terminate) &&
9985 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9986 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009987 xmlParseReference(ctxt);
9988 } else {
9989 /* TODO Avoid the extra copy, handle directly !!! */
9990 /*
9991 * Goal of the following test is:
9992 * - minimize calls to the SAX 'character' callback
9993 * when they are mergeable
9994 * - handle an problem for isBlank when we only parse
9995 * a sequence of blank chars and the next one is
9996 * not available to check against '<' presence.
9997 * - tries to homogenize the differences in SAX
9998 * callbacks between the push and pull versions
9999 * of the parser.
10000 */
10001 if ((ctxt->inputNr == 1) &&
10002 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10003 if (!terminate) {
10004 if (ctxt->progressive) {
10005 if ((lastlt == NULL) ||
10006 (ctxt->input->cur > lastlt))
10007 goto done;
10008 } else if (xmlParseLookupSequence(ctxt,
10009 '<', 0, 0) < 0) {
10010 goto done;
10011 }
10012 }
10013 }
10014 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010015 xmlParseCharData(ctxt, 0);
10016 }
10017 /*
10018 * Pop-up of finished entities.
10019 */
10020 while ((RAW == 0) && (ctxt->inputNr > 1))
10021 xmlPopInput(ctxt);
10022 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010023 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10024 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010025 ctxt->instate = XML_PARSER_EOF;
10026 break;
10027 }
10028 break;
10029 }
10030 case XML_PARSER_END_TAG:
10031 if (avail < 2)
10032 goto done;
10033 if (!terminate) {
10034 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010035 /* > can be found unescaped in attribute values */
10036 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010037 goto done;
10038 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10039 goto done;
10040 }
10041 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010042 if (ctxt->sax2) {
10043 xmlParseEndTag2(ctxt,
10044 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10045 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010046 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010047 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010048 }
10049#ifdef LIBXML_SAX1_ENABLED
10050 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010051 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010052#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010053 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010054 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010055 } else {
10056 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010057 }
10058 break;
10059 case XML_PARSER_CDATA_SECTION: {
10060 /*
10061 * The Push mode need to have the SAX callback for
10062 * cdataBlock merge back contiguous callbacks.
10063 */
10064 int base;
10065
10066 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10067 if (base < 0) {
10068 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010069 int tmp;
10070
10071 tmp = xmlCheckCdataPush(ctxt->input->cur,
10072 XML_PARSER_BIG_BUFFER_SIZE);
10073 if (tmp < 0) {
10074 tmp = -tmp;
10075 ctxt->input->cur += tmp;
10076 goto encoding_error;
10077 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010078 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10079 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010080 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010081 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010082 else if (ctxt->sax->characters != NULL)
10083 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010084 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010085 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010086 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010087 ctxt->checkIndex = 0;
10088 }
10089 goto done;
10090 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010091 int tmp;
10092
10093 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10094 if ((tmp < 0) || (tmp != base)) {
10095 tmp = -tmp;
10096 ctxt->input->cur += tmp;
10097 goto encoding_error;
10098 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010099 if ((ctxt->sax != NULL) && (base > 0) &&
10100 (!ctxt->disableSAX)) {
10101 if (ctxt->sax->cdataBlock != NULL)
10102 ctxt->sax->cdataBlock(ctxt->userData,
10103 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010104 else if (ctxt->sax->characters != NULL)
10105 ctxt->sax->characters(ctxt->userData,
10106 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010107 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010108 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010109 ctxt->checkIndex = 0;
10110 ctxt->instate = XML_PARSER_CONTENT;
10111#ifdef DEBUG_PUSH
10112 xmlGenericError(xmlGenericErrorContext,
10113 "PP: entering CONTENT\n");
10114#endif
10115 }
10116 break;
10117 }
Owen Taylor3473f882001-02-23 17:55:21 +000010118 case XML_PARSER_MISC:
10119 SKIP_BLANKS;
10120 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010121 avail = ctxt->input->length -
10122 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010123 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010124 avail = ctxt->input->buf->buffer->use -
10125 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010126 if (avail < 2)
10127 goto done;
10128 cur = ctxt->input->cur[0];
10129 next = ctxt->input->cur[1];
10130 if ((cur == '<') && (next == '?')) {
10131 if ((!terminate) &&
10132 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10133 goto done;
10134#ifdef DEBUG_PUSH
10135 xmlGenericError(xmlGenericErrorContext,
10136 "PP: Parsing PI\n");
10137#endif
10138 xmlParsePI(ctxt);
10139 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010140 (ctxt->input->cur[2] == '-') &&
10141 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010142 if ((!terminate) &&
10143 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10144 goto done;
10145#ifdef DEBUG_PUSH
10146 xmlGenericError(xmlGenericErrorContext,
10147 "PP: Parsing Comment\n");
10148#endif
10149 xmlParseComment(ctxt);
10150 ctxt->instate = XML_PARSER_MISC;
10151 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010152 (ctxt->input->cur[2] == 'D') &&
10153 (ctxt->input->cur[3] == 'O') &&
10154 (ctxt->input->cur[4] == 'C') &&
10155 (ctxt->input->cur[5] == 'T') &&
10156 (ctxt->input->cur[6] == 'Y') &&
10157 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010158 (ctxt->input->cur[8] == 'E')) {
10159 if ((!terminate) &&
10160 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10161 goto done;
10162#ifdef DEBUG_PUSH
10163 xmlGenericError(xmlGenericErrorContext,
10164 "PP: Parsing internal subset\n");
10165#endif
10166 ctxt->inSubset = 1;
10167 xmlParseDocTypeDecl(ctxt);
10168 if (RAW == '[') {
10169 ctxt->instate = XML_PARSER_DTD;
10170#ifdef DEBUG_PUSH
10171 xmlGenericError(xmlGenericErrorContext,
10172 "PP: entering DTD\n");
10173#endif
10174 } else {
10175 /*
10176 * Create and update the external subset.
10177 */
10178 ctxt->inSubset = 2;
10179 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10180 (ctxt->sax->externalSubset != NULL))
10181 ctxt->sax->externalSubset(ctxt->userData,
10182 ctxt->intSubName, ctxt->extSubSystem,
10183 ctxt->extSubURI);
10184 ctxt->inSubset = 0;
10185 ctxt->instate = XML_PARSER_PROLOG;
10186#ifdef DEBUG_PUSH
10187 xmlGenericError(xmlGenericErrorContext,
10188 "PP: entering PROLOG\n");
10189#endif
10190 }
10191 } else if ((cur == '<') && (next == '!') &&
10192 (avail < 9)) {
10193 goto done;
10194 } else {
10195 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010196 ctxt->progressive = 1;
10197 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010198#ifdef DEBUG_PUSH
10199 xmlGenericError(xmlGenericErrorContext,
10200 "PP: entering START_TAG\n");
10201#endif
10202 }
10203 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010204 case XML_PARSER_PROLOG:
10205 SKIP_BLANKS;
10206 if (ctxt->input->buf == NULL)
10207 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10208 else
10209 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10210 if (avail < 2)
10211 goto done;
10212 cur = ctxt->input->cur[0];
10213 next = ctxt->input->cur[1];
10214 if ((cur == '<') && (next == '?')) {
10215 if ((!terminate) &&
10216 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10217 goto done;
10218#ifdef DEBUG_PUSH
10219 xmlGenericError(xmlGenericErrorContext,
10220 "PP: Parsing PI\n");
10221#endif
10222 xmlParsePI(ctxt);
10223 } else if ((cur == '<') && (next == '!') &&
10224 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10225 if ((!terminate) &&
10226 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10227 goto done;
10228#ifdef DEBUG_PUSH
10229 xmlGenericError(xmlGenericErrorContext,
10230 "PP: Parsing Comment\n");
10231#endif
10232 xmlParseComment(ctxt);
10233 ctxt->instate = XML_PARSER_PROLOG;
10234 } else if ((cur == '<') && (next == '!') &&
10235 (avail < 4)) {
10236 goto done;
10237 } else {
10238 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010239 if (ctxt->progressive == 0)
10240 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010241 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010242#ifdef DEBUG_PUSH
10243 xmlGenericError(xmlGenericErrorContext,
10244 "PP: entering START_TAG\n");
10245#endif
10246 }
10247 break;
10248 case XML_PARSER_EPILOG:
10249 SKIP_BLANKS;
10250 if (ctxt->input->buf == NULL)
10251 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10252 else
10253 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10254 if (avail < 2)
10255 goto done;
10256 cur = ctxt->input->cur[0];
10257 next = ctxt->input->cur[1];
10258 if ((cur == '<') && (next == '?')) {
10259 if ((!terminate) &&
10260 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10261 goto done;
10262#ifdef DEBUG_PUSH
10263 xmlGenericError(xmlGenericErrorContext,
10264 "PP: Parsing PI\n");
10265#endif
10266 xmlParsePI(ctxt);
10267 ctxt->instate = XML_PARSER_EPILOG;
10268 } else if ((cur == '<') && (next == '!') &&
10269 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10270 if ((!terminate) &&
10271 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10272 goto done;
10273#ifdef DEBUG_PUSH
10274 xmlGenericError(xmlGenericErrorContext,
10275 "PP: Parsing Comment\n");
10276#endif
10277 xmlParseComment(ctxt);
10278 ctxt->instate = XML_PARSER_EPILOG;
10279 } else if ((cur == '<') && (next == '!') &&
10280 (avail < 4)) {
10281 goto done;
10282 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010283 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010284 ctxt->instate = XML_PARSER_EOF;
10285#ifdef DEBUG_PUSH
10286 xmlGenericError(xmlGenericErrorContext,
10287 "PP: entering EOF\n");
10288#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010289 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010290 ctxt->sax->endDocument(ctxt->userData);
10291 goto done;
10292 }
10293 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010294 case XML_PARSER_DTD: {
10295 /*
10296 * Sorry but progressive parsing of the internal subset
10297 * is not expected to be supported. We first check that
10298 * the full content of the internal subset is available and
10299 * the parsing is launched only at that point.
10300 * Internal subset ends up with "']' S? '>'" in an unescaped
10301 * section and not in a ']]>' sequence which are conditional
10302 * sections (whoever argued to keep that crap in XML deserve
10303 * a place in hell !).
10304 */
10305 int base, i;
10306 xmlChar *buf;
10307 xmlChar quote = 0;
10308
10309 base = ctxt->input->cur - ctxt->input->base;
10310 if (base < 0) return(0);
10311 if (ctxt->checkIndex > base)
10312 base = ctxt->checkIndex;
10313 buf = ctxt->input->buf->buffer->content;
10314 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10315 base++) {
10316 if (quote != 0) {
10317 if (buf[base] == quote)
10318 quote = 0;
10319 continue;
10320 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010321 if ((quote == 0) && (buf[base] == '<')) {
10322 int found = 0;
10323 /* special handling of comments */
10324 if (((unsigned int) base + 4 <
10325 ctxt->input->buf->buffer->use) &&
10326 (buf[base + 1] == '!') &&
10327 (buf[base + 2] == '-') &&
10328 (buf[base + 3] == '-')) {
10329 for (;(unsigned int) base + 3 <
10330 ctxt->input->buf->buffer->use; base++) {
10331 if ((buf[base] == '-') &&
10332 (buf[base + 1] == '-') &&
10333 (buf[base + 2] == '>')) {
10334 found = 1;
10335 base += 2;
10336 break;
10337 }
10338 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010339 if (!found) {
10340#if 0
10341 fprintf(stderr, "unfinished comment\n");
10342#endif
10343 break; /* for */
10344 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010345 continue;
10346 }
10347 }
Owen Taylor3473f882001-02-23 17:55:21 +000010348 if (buf[base] == '"') {
10349 quote = '"';
10350 continue;
10351 }
10352 if (buf[base] == '\'') {
10353 quote = '\'';
10354 continue;
10355 }
10356 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010357#if 0
10358 fprintf(stderr, "%c%c%c%c: ", buf[base],
10359 buf[base + 1], buf[base + 2], buf[base + 3]);
10360#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010361 if ((unsigned int) base +1 >=
10362 ctxt->input->buf->buffer->use)
10363 break;
10364 if (buf[base + 1] == ']') {
10365 /* conditional crap, skip both ']' ! */
10366 base++;
10367 continue;
10368 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010369 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010370 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10371 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010372 if (buf[base + i] == '>') {
10373#if 0
10374 fprintf(stderr, "found\n");
10375#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010376 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010377 }
10378 if (!IS_BLANK_CH(buf[base + i])) {
10379#if 0
10380 fprintf(stderr, "not found\n");
10381#endif
10382 goto not_end_of_int_subset;
10383 }
Owen Taylor3473f882001-02-23 17:55:21 +000010384 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010385#if 0
10386 fprintf(stderr, "end of stream\n");
10387#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010388 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010389
Owen Taylor3473f882001-02-23 17:55:21 +000010390 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010391not_end_of_int_subset:
10392 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010393 }
10394 /*
10395 * We didn't found the end of the Internal subset
10396 */
Owen Taylor3473f882001-02-23 17:55:21 +000010397#ifdef DEBUG_PUSH
10398 if (next == 0)
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: lookup of int subset end filed\n");
10401#endif
10402 goto done;
10403
10404found_end_int_subset:
10405 xmlParseInternalSubset(ctxt);
10406 ctxt->inSubset = 2;
10407 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10408 (ctxt->sax->externalSubset != NULL))
10409 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10410 ctxt->extSubSystem, ctxt->extSubURI);
10411 ctxt->inSubset = 0;
10412 ctxt->instate = XML_PARSER_PROLOG;
10413 ctxt->checkIndex = 0;
10414#ifdef DEBUG_PUSH
10415 xmlGenericError(xmlGenericErrorContext,
10416 "PP: entering PROLOG\n");
10417#endif
10418 break;
10419 }
10420 case XML_PARSER_COMMENT:
10421 xmlGenericError(xmlGenericErrorContext,
10422 "PP: internal error, state == COMMENT\n");
10423 ctxt->instate = XML_PARSER_CONTENT;
10424#ifdef DEBUG_PUSH
10425 xmlGenericError(xmlGenericErrorContext,
10426 "PP: entering CONTENT\n");
10427#endif
10428 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010429 case XML_PARSER_IGNORE:
10430 xmlGenericError(xmlGenericErrorContext,
10431 "PP: internal error, state == IGNORE");
10432 ctxt->instate = XML_PARSER_DTD;
10433#ifdef DEBUG_PUSH
10434 xmlGenericError(xmlGenericErrorContext,
10435 "PP: entering DTD\n");
10436#endif
10437 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010438 case XML_PARSER_PI:
10439 xmlGenericError(xmlGenericErrorContext,
10440 "PP: internal error, state == PI\n");
10441 ctxt->instate = XML_PARSER_CONTENT;
10442#ifdef DEBUG_PUSH
10443 xmlGenericError(xmlGenericErrorContext,
10444 "PP: entering CONTENT\n");
10445#endif
10446 break;
10447 case XML_PARSER_ENTITY_DECL:
10448 xmlGenericError(xmlGenericErrorContext,
10449 "PP: internal error, state == ENTITY_DECL\n");
10450 ctxt->instate = XML_PARSER_DTD;
10451#ifdef DEBUG_PUSH
10452 xmlGenericError(xmlGenericErrorContext,
10453 "PP: entering DTD\n");
10454#endif
10455 break;
10456 case XML_PARSER_ENTITY_VALUE:
10457 xmlGenericError(xmlGenericErrorContext,
10458 "PP: internal error, state == ENTITY_VALUE\n");
10459 ctxt->instate = XML_PARSER_CONTENT;
10460#ifdef DEBUG_PUSH
10461 xmlGenericError(xmlGenericErrorContext,
10462 "PP: entering DTD\n");
10463#endif
10464 break;
10465 case XML_PARSER_ATTRIBUTE_VALUE:
10466 xmlGenericError(xmlGenericErrorContext,
10467 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10468 ctxt->instate = XML_PARSER_START_TAG;
10469#ifdef DEBUG_PUSH
10470 xmlGenericError(xmlGenericErrorContext,
10471 "PP: entering START_TAG\n");
10472#endif
10473 break;
10474 case XML_PARSER_SYSTEM_LITERAL:
10475 xmlGenericError(xmlGenericErrorContext,
10476 "PP: internal error, state == SYSTEM_LITERAL\n");
10477 ctxt->instate = XML_PARSER_START_TAG;
10478#ifdef DEBUG_PUSH
10479 xmlGenericError(xmlGenericErrorContext,
10480 "PP: entering START_TAG\n");
10481#endif
10482 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010483 case XML_PARSER_PUBLIC_LITERAL:
10484 xmlGenericError(xmlGenericErrorContext,
10485 "PP: internal error, state == PUBLIC_LITERAL\n");
10486 ctxt->instate = XML_PARSER_START_TAG;
10487#ifdef DEBUG_PUSH
10488 xmlGenericError(xmlGenericErrorContext,
10489 "PP: entering START_TAG\n");
10490#endif
10491 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010492 }
10493 }
10494done:
10495#ifdef DEBUG_PUSH
10496 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10497#endif
10498 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010499encoding_error:
10500 {
10501 char buffer[150];
10502
10503 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10504 ctxt->input->cur[0], ctxt->input->cur[1],
10505 ctxt->input->cur[2], ctxt->input->cur[3]);
10506 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10507 "Input is not proper UTF-8, indicate encoding !\n%s",
10508 BAD_CAST buffer, NULL);
10509 }
10510 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010511}
10512
10513/**
Owen Taylor3473f882001-02-23 17:55:21 +000010514 * xmlParseChunk:
10515 * @ctxt: an XML parser context
10516 * @chunk: an char array
10517 * @size: the size in byte of the chunk
10518 * @terminate: last chunk indicator
10519 *
10520 * Parse a Chunk of memory
10521 *
10522 * Returns zero if no error, the xmlParserErrors otherwise.
10523 */
10524int
10525xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10526 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000010527 int end_in_lf = 0;
10528
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010529 if (ctxt == NULL)
10530 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010531 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010532 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010533 if (ctxt->instate == XML_PARSER_START)
10534 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000010535 if ((size > 0) && (chunk != NULL) && (!terminate) &&
10536 (chunk[size - 1] == '\r')) {
10537 end_in_lf = 1;
10538 size--;
10539 }
Owen Taylor3473f882001-02-23 17:55:21 +000010540 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10541 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10542 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10543 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010544 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010545
William M. Bracka3215c72004-07-31 16:24:01 +000010546 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10547 if (res < 0) {
10548 ctxt->errNo = XML_PARSER_EOF;
10549 ctxt->disableSAX = 1;
10550 return (XML_PARSER_EOF);
10551 }
Owen Taylor3473f882001-02-23 17:55:21 +000010552 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10553 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010554 ctxt->input->end =
10555 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010556#ifdef DEBUG_PUSH
10557 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10558#endif
10559
Owen Taylor3473f882001-02-23 17:55:21 +000010560 } else if (ctxt->instate != XML_PARSER_EOF) {
10561 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10562 xmlParserInputBufferPtr in = ctxt->input->buf;
10563 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10564 (in->raw != NULL)) {
10565 int nbchars;
10566
10567 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10568 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010569 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010570 xmlGenericError(xmlGenericErrorContext,
10571 "xmlParseChunk: encoder error\n");
10572 return(XML_ERR_INVALID_ENCODING);
10573 }
10574 }
10575 }
10576 }
10577 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000010578 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
10579 (ctxt->input->buf != NULL)) {
10580 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
10581 }
Daniel Veillard14412512005-01-21 23:53:26 +000010582 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010583 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010584 if (terminate) {
10585 /*
10586 * Check for termination
10587 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010588 int avail = 0;
10589
10590 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010591 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010592 avail = ctxt->input->length -
10593 (ctxt->input->cur - ctxt->input->base);
10594 else
10595 avail = ctxt->input->buf->buffer->use -
10596 (ctxt->input->cur - ctxt->input->base);
10597 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010598
Owen Taylor3473f882001-02-23 17:55:21 +000010599 if ((ctxt->instate != XML_PARSER_EOF) &&
10600 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010601 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010602 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010603 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010604 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010605 }
Owen Taylor3473f882001-02-23 17:55:21 +000010606 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010607 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010608 ctxt->sax->endDocument(ctxt->userData);
10609 }
10610 ctxt->instate = XML_PARSER_EOF;
10611 }
10612 return((xmlParserErrors) ctxt->errNo);
10613}
10614
10615/************************************************************************
10616 * *
10617 * I/O front end functions to the parser *
10618 * *
10619 ************************************************************************/
10620
10621/**
Owen Taylor3473f882001-02-23 17:55:21 +000010622 * xmlCreatePushParserCtxt:
10623 * @sax: a SAX handler
10624 * @user_data: The user data returned on SAX callbacks
10625 * @chunk: a pointer to an array of chars
10626 * @size: number of chars in the array
10627 * @filename: an optional file name or URI
10628 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010629 * Create a parser context for using the XML parser in push mode.
10630 * If @buffer and @size are non-NULL, the data is used to detect
10631 * the encoding. The remaining characters will be parsed so they
10632 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010633 * To allow content encoding detection, @size should be >= 4
10634 * The value of @filename is used for fetching external entities
10635 * and error/warning reports.
10636 *
10637 * Returns the new parser context or NULL
10638 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010639
Owen Taylor3473f882001-02-23 17:55:21 +000010640xmlParserCtxtPtr
10641xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10642 const char *chunk, int size, const char *filename) {
10643 xmlParserCtxtPtr ctxt;
10644 xmlParserInputPtr inputStream;
10645 xmlParserInputBufferPtr buf;
10646 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10647
10648 /*
10649 * plug some encoding conversion routines
10650 */
10651 if ((chunk != NULL) && (size >= 4))
10652 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10653
10654 buf = xmlAllocParserInputBuffer(enc);
10655 if (buf == NULL) return(NULL);
10656
10657 ctxt = xmlNewParserCtxt();
10658 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010659 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010660 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010661 return(NULL);
10662 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010663 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010664 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10665 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010666 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010667 xmlFreeParserInputBuffer(buf);
10668 xmlFreeParserCtxt(ctxt);
10669 return(NULL);
10670 }
Owen Taylor3473f882001-02-23 17:55:21 +000010671 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010672#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010673 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010674#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010675 xmlFree(ctxt->sax);
10676 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10677 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010678 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010679 xmlFreeParserInputBuffer(buf);
10680 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010681 return(NULL);
10682 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010683 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10684 if (sax->initialized == XML_SAX2_MAGIC)
10685 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10686 else
10687 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010688 if (user_data != NULL)
10689 ctxt->userData = user_data;
10690 }
10691 if (filename == NULL) {
10692 ctxt->directory = NULL;
10693 } else {
10694 ctxt->directory = xmlParserGetDirectory(filename);
10695 }
10696
10697 inputStream = xmlNewInputStream(ctxt);
10698 if (inputStream == NULL) {
10699 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010700 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010701 return(NULL);
10702 }
10703
10704 if (filename == NULL)
10705 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010706 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010707 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010708 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010709 if (inputStream->filename == NULL) {
10710 xmlFreeParserCtxt(ctxt);
10711 xmlFreeParserInputBuffer(buf);
10712 return(NULL);
10713 }
10714 }
Owen Taylor3473f882001-02-23 17:55:21 +000010715 inputStream->buf = buf;
10716 inputStream->base = inputStream->buf->buffer->content;
10717 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010718 inputStream->end =
10719 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010720
10721 inputPush(ctxt, inputStream);
10722
William M. Brack3a1cd212005-02-11 14:35:54 +000010723 /*
10724 * If the caller didn't provide an initial 'chunk' for determining
10725 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10726 * that it can be automatically determined later
10727 */
10728 if ((size == 0) || (chunk == NULL)) {
10729 ctxt->charset = XML_CHAR_ENCODING_NONE;
10730 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010731 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10732 int cur = ctxt->input->cur - ctxt->input->base;
10733
Owen Taylor3473f882001-02-23 17:55:21 +000010734 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010735
10736 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10737 ctxt->input->cur = ctxt->input->base + cur;
10738 ctxt->input->end =
10739 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010740#ifdef DEBUG_PUSH
10741 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10742#endif
10743 }
10744
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010745 if (enc != XML_CHAR_ENCODING_NONE) {
10746 xmlSwitchEncoding(ctxt, enc);
10747 }
10748
Owen Taylor3473f882001-02-23 17:55:21 +000010749 return(ctxt);
10750}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010751#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010752
10753/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010754 * xmlStopParser:
10755 * @ctxt: an XML parser context
10756 *
10757 * Blocks further parser processing
10758 */
10759void
10760xmlStopParser(xmlParserCtxtPtr ctxt) {
10761 if (ctxt == NULL)
10762 return;
10763 ctxt->instate = XML_PARSER_EOF;
10764 ctxt->disableSAX = 1;
10765 if (ctxt->input != NULL) {
10766 ctxt->input->cur = BAD_CAST"";
10767 ctxt->input->base = ctxt->input->cur;
10768 }
10769}
10770
10771/**
Owen Taylor3473f882001-02-23 17:55:21 +000010772 * xmlCreateIOParserCtxt:
10773 * @sax: a SAX handler
10774 * @user_data: The user data returned on SAX callbacks
10775 * @ioread: an I/O read function
10776 * @ioclose: an I/O close function
10777 * @ioctx: an I/O handler
10778 * @enc: the charset encoding if known
10779 *
10780 * Create a parser context for using the XML parser with an existing
10781 * I/O stream
10782 *
10783 * Returns the new parser context or NULL
10784 */
10785xmlParserCtxtPtr
10786xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10787 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10788 void *ioctx, xmlCharEncoding enc) {
10789 xmlParserCtxtPtr ctxt;
10790 xmlParserInputPtr inputStream;
10791 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010792
10793 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010794
10795 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10796 if (buf == NULL) return(NULL);
10797
10798 ctxt = xmlNewParserCtxt();
10799 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010800 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010801 return(NULL);
10802 }
10803 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010805 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010806#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010807 xmlFree(ctxt->sax);
10808 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10809 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010810 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010811 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010812 return(NULL);
10813 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010814 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10815 if (sax->initialized == XML_SAX2_MAGIC)
10816 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10817 else
10818 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010819 if (user_data != NULL)
10820 ctxt->userData = user_data;
10821 }
10822
10823 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10824 if (inputStream == NULL) {
10825 xmlFreeParserCtxt(ctxt);
10826 return(NULL);
10827 }
10828 inputPush(ctxt, inputStream);
10829
10830 return(ctxt);
10831}
10832
Daniel Veillard4432df22003-09-28 18:58:27 +000010833#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010834/************************************************************************
10835 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010836 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010837 * *
10838 ************************************************************************/
10839
10840/**
10841 * xmlIOParseDTD:
10842 * @sax: the SAX handler block or NULL
10843 * @input: an Input Buffer
10844 * @enc: the charset encoding if known
10845 *
10846 * Load and parse a DTD
10847 *
10848 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000010849 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000010850 */
10851
10852xmlDtdPtr
10853xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10854 xmlCharEncoding enc) {
10855 xmlDtdPtr ret = NULL;
10856 xmlParserCtxtPtr ctxt;
10857 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010858 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010859
10860 if (input == NULL)
10861 return(NULL);
10862
10863 ctxt = xmlNewParserCtxt();
10864 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000010865 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010866 return(NULL);
10867 }
10868
10869 /*
10870 * Set-up the SAX context
10871 */
10872 if (sax != NULL) {
10873 if (ctxt->sax != NULL)
10874 xmlFree(ctxt->sax);
10875 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010876 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010877 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010878 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010879
10880 /*
10881 * generate a parser input from the I/O handler
10882 */
10883
Daniel Veillard43caefb2003-12-07 19:32:22 +000010884 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010885 if (pinput == NULL) {
10886 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000010887 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 xmlFreeParserCtxt(ctxt);
10889 return(NULL);
10890 }
10891
10892 /*
10893 * plug some encoding conversion routines here.
10894 */
10895 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010896 if (enc != XML_CHAR_ENCODING_NONE) {
10897 xmlSwitchEncoding(ctxt, enc);
10898 }
Owen Taylor3473f882001-02-23 17:55:21 +000010899
10900 pinput->filename = NULL;
10901 pinput->line = 1;
10902 pinput->col = 1;
10903 pinput->base = ctxt->input->cur;
10904 pinput->cur = ctxt->input->cur;
10905 pinput->free = NULL;
10906
10907 /*
10908 * let's parse that entity knowing it's an external subset.
10909 */
10910 ctxt->inSubset = 2;
10911 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10912 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10913 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010914
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010915 if ((enc == XML_CHAR_ENCODING_NONE) &&
10916 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010917 /*
10918 * Get the 4 first bytes and decode the charset
10919 * if enc != XML_CHAR_ENCODING_NONE
10920 * plug some encoding conversion routines.
10921 */
10922 start[0] = RAW;
10923 start[1] = NXT(1);
10924 start[2] = NXT(2);
10925 start[3] = NXT(3);
10926 enc = xmlDetectCharEncoding(start, 4);
10927 if (enc != XML_CHAR_ENCODING_NONE) {
10928 xmlSwitchEncoding(ctxt, enc);
10929 }
10930 }
10931
Owen Taylor3473f882001-02-23 17:55:21 +000010932 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10933
10934 if (ctxt->myDoc != NULL) {
10935 if (ctxt->wellFormed) {
10936 ret = ctxt->myDoc->extSubset;
10937 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010938 if (ret != NULL) {
10939 xmlNodePtr tmp;
10940
10941 ret->doc = NULL;
10942 tmp = ret->children;
10943 while (tmp != NULL) {
10944 tmp->doc = NULL;
10945 tmp = tmp->next;
10946 }
10947 }
Owen Taylor3473f882001-02-23 17:55:21 +000010948 } else {
10949 ret = NULL;
10950 }
10951 xmlFreeDoc(ctxt->myDoc);
10952 ctxt->myDoc = NULL;
10953 }
10954 if (sax != NULL) ctxt->sax = NULL;
10955 xmlFreeParserCtxt(ctxt);
10956
10957 return(ret);
10958}
10959
10960/**
10961 * xmlSAXParseDTD:
10962 * @sax: the SAX handler block
10963 * @ExternalID: a NAME* containing the External ID of the DTD
10964 * @SystemID: a NAME* containing the URL to the DTD
10965 *
10966 * Load and parse an external subset.
10967 *
10968 * Returns the resulting xmlDtdPtr or NULL in case of error.
10969 */
10970
10971xmlDtdPtr
10972xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10973 const xmlChar *SystemID) {
10974 xmlDtdPtr ret = NULL;
10975 xmlParserCtxtPtr ctxt;
10976 xmlParserInputPtr input = NULL;
10977 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010978 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010979
10980 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10981
10982 ctxt = xmlNewParserCtxt();
10983 if (ctxt == NULL) {
10984 return(NULL);
10985 }
10986
10987 /*
10988 * Set-up the SAX context
10989 */
10990 if (sax != NULL) {
10991 if (ctxt->sax != NULL)
10992 xmlFree(ctxt->sax);
10993 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010994 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010995 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010996
10997 /*
10998 * Canonicalise the system ID
10999 */
11000 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011001 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011002 xmlFreeParserCtxt(ctxt);
11003 return(NULL);
11004 }
Owen Taylor3473f882001-02-23 17:55:21 +000011005
11006 /*
11007 * Ask the Entity resolver to load the damn thing
11008 */
11009
11010 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011011 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11012 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011013 if (input == NULL) {
11014 if (sax != NULL) ctxt->sax = NULL;
11015 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011016 if (systemIdCanonic != NULL)
11017 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011018 return(NULL);
11019 }
11020
11021 /*
11022 * plug some encoding conversion routines here.
11023 */
11024 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011025 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11026 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11027 xmlSwitchEncoding(ctxt, enc);
11028 }
Owen Taylor3473f882001-02-23 17:55:21 +000011029
11030 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011031 input->filename = (char *) systemIdCanonic;
11032 else
11033 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011034 input->line = 1;
11035 input->col = 1;
11036 input->base = ctxt->input->cur;
11037 input->cur = ctxt->input->cur;
11038 input->free = NULL;
11039
11040 /*
11041 * let's parse that entity knowing it's an external subset.
11042 */
11043 ctxt->inSubset = 2;
11044 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11045 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11046 ExternalID, SystemID);
11047 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11048
11049 if (ctxt->myDoc != NULL) {
11050 if (ctxt->wellFormed) {
11051 ret = ctxt->myDoc->extSubset;
11052 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011053 if (ret != NULL) {
11054 xmlNodePtr tmp;
11055
11056 ret->doc = NULL;
11057 tmp = ret->children;
11058 while (tmp != NULL) {
11059 tmp->doc = NULL;
11060 tmp = tmp->next;
11061 }
11062 }
Owen Taylor3473f882001-02-23 17:55:21 +000011063 } else {
11064 ret = NULL;
11065 }
11066 xmlFreeDoc(ctxt->myDoc);
11067 ctxt->myDoc = NULL;
11068 }
11069 if (sax != NULL) ctxt->sax = NULL;
11070 xmlFreeParserCtxt(ctxt);
11071
11072 return(ret);
11073}
11074
Daniel Veillard4432df22003-09-28 18:58:27 +000011075
Owen Taylor3473f882001-02-23 17:55:21 +000011076/**
11077 * xmlParseDTD:
11078 * @ExternalID: a NAME* containing the External ID of the DTD
11079 * @SystemID: a NAME* containing the URL to the DTD
11080 *
11081 * Load and parse an external subset.
11082 *
11083 * Returns the resulting xmlDtdPtr or NULL in case of error.
11084 */
11085
11086xmlDtdPtr
11087xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11088 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11089}
Daniel Veillard4432df22003-09-28 18:58:27 +000011090#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011091
11092/************************************************************************
11093 * *
11094 * Front ends when parsing an Entity *
11095 * *
11096 ************************************************************************/
11097
11098/**
Owen Taylor3473f882001-02-23 17:55:21 +000011099 * xmlParseCtxtExternalEntity:
11100 * @ctx: the existing parsing context
11101 * @URL: the URL for the entity to load
11102 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011103 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011104 *
11105 * Parse an external general entity within an existing parsing context
11106 * An external general parsed entity is well-formed if it matches the
11107 * production labeled extParsedEnt.
11108 *
11109 * [78] extParsedEnt ::= TextDecl? content
11110 *
11111 * Returns 0 if the entity is well formed, -1 in case of args problem and
11112 * the parser error code otherwise
11113 */
11114
11115int
11116xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011117 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011118 xmlParserCtxtPtr ctxt;
11119 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011120 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011121 xmlSAXHandlerPtr oldsax = NULL;
11122 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011123 xmlChar start[4];
11124 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011125 xmlParserInputPtr inputStream;
11126 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011127
Daniel Veillardce682bc2004-11-05 17:22:25 +000011128 if (ctx == NULL) return(-1);
11129
Owen Taylor3473f882001-02-23 17:55:21 +000011130 if (ctx->depth > 40) {
11131 return(XML_ERR_ENTITY_LOOP);
11132 }
11133
Daniel Veillardcda96922001-08-21 10:56:31 +000011134 if (lst != NULL)
11135 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011136 if ((URL == NULL) && (ID == NULL))
11137 return(-1);
11138 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11139 return(-1);
11140
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011141 ctxt = xmlNewParserCtxt();
11142 if (ctxt == NULL) {
11143 return(-1);
11144 }
11145
Owen Taylor3473f882001-02-23 17:55:21 +000011146 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011147 ctxt->_private = ctx->_private;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011148
11149 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11150 if (inputStream == NULL) {
11151 xmlFreeParserCtxt(ctxt);
11152 return(-1);
11153 }
11154
11155 inputPush(ctxt, inputStream);
11156
11157 if ((ctxt->directory == NULL) && (directory == NULL))
11158 directory = xmlParserGetDirectory((char *)URL);
11159 if ((ctxt->directory == NULL) && (directory != NULL))
11160 ctxt->directory = directory;
11161
Owen Taylor3473f882001-02-23 17:55:21 +000011162 oldsax = ctxt->sax;
11163 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011164 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011165 newDoc = xmlNewDoc(BAD_CAST "1.0");
11166 if (newDoc == NULL) {
11167 xmlFreeParserCtxt(ctxt);
11168 return(-1);
11169 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011170 if (ctx->myDoc->dict) {
11171 newDoc->dict = ctx->myDoc->dict;
11172 xmlDictReference(newDoc->dict);
11173 }
Owen Taylor3473f882001-02-23 17:55:21 +000011174 if (ctx->myDoc != NULL) {
11175 newDoc->intSubset = ctx->myDoc->intSubset;
11176 newDoc->extSubset = ctx->myDoc->extSubset;
11177 }
11178 if (ctx->myDoc->URL != NULL) {
11179 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11180 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011181 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11182 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011183 ctxt->sax = oldsax;
11184 xmlFreeParserCtxt(ctxt);
11185 newDoc->intSubset = NULL;
11186 newDoc->extSubset = NULL;
11187 xmlFreeDoc(newDoc);
11188 return(-1);
11189 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011190 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011191 nodePush(ctxt, newDoc->children);
11192 if (ctx->myDoc == NULL) {
11193 ctxt->myDoc = newDoc;
11194 } else {
11195 ctxt->myDoc = ctx->myDoc;
11196 newDoc->children->doc = ctx->myDoc;
11197 }
11198
Daniel Veillard87a764e2001-06-20 17:41:10 +000011199 /*
11200 * Get the 4 first bytes and decode the charset
11201 * if enc != XML_CHAR_ENCODING_NONE
11202 * plug some encoding conversion routines.
11203 */
11204 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011205 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11206 start[0] = RAW;
11207 start[1] = NXT(1);
11208 start[2] = NXT(2);
11209 start[3] = NXT(3);
11210 enc = xmlDetectCharEncoding(start, 4);
11211 if (enc != XML_CHAR_ENCODING_NONE) {
11212 xmlSwitchEncoding(ctxt, enc);
11213 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011214 }
11215
Owen Taylor3473f882001-02-23 17:55:21 +000011216 /*
11217 * Parse a possible text declaration first
11218 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011219 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011220 xmlParseTextDecl(ctxt);
11221 }
11222
11223 /*
11224 * Doing validity checking on chunk doesn't make sense
11225 */
11226 ctxt->instate = XML_PARSER_CONTENT;
11227 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011228 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011229 ctxt->loadsubset = ctx->loadsubset;
11230 ctxt->depth = ctx->depth + 1;
11231 ctxt->replaceEntities = ctx->replaceEntities;
11232 if (ctxt->validate) {
11233 ctxt->vctxt.error = ctx->vctxt.error;
11234 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011235 } else {
11236 ctxt->vctxt.error = NULL;
11237 ctxt->vctxt.warning = NULL;
11238 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011239 ctxt->vctxt.nodeTab = NULL;
11240 ctxt->vctxt.nodeNr = 0;
11241 ctxt->vctxt.nodeMax = 0;
11242 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011243 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11244 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011245 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11246 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11247 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011248 ctxt->dictNames = ctx->dictNames;
11249 ctxt->attsDefault = ctx->attsDefault;
11250 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011251 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011252
11253 xmlParseContent(ctxt);
11254
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011255 ctx->validate = ctxt->validate;
11256 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011257 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011258 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011259 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011260 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011261 }
11262 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011263 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 }
11265
11266 if (!ctxt->wellFormed) {
11267 if (ctxt->errNo == 0)
11268 ret = 1;
11269 else
11270 ret = ctxt->errNo;
11271 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011272 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011273 xmlNodePtr cur;
11274
11275 /*
11276 * Return the newly created nodeset after unlinking it from
11277 * they pseudo parent.
11278 */
11279 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011280 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011281 while (cur != NULL) {
11282 cur->parent = NULL;
11283 cur = cur->next;
11284 }
11285 newDoc->children->children = NULL;
11286 }
11287 ret = 0;
11288 }
11289 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011290 ctxt->dict = NULL;
11291 ctxt->attsDefault = NULL;
11292 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011293 xmlFreeParserCtxt(ctxt);
11294 newDoc->intSubset = NULL;
11295 newDoc->extSubset = NULL;
11296 xmlFreeDoc(newDoc);
11297
11298 return(ret);
11299}
11300
11301/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011302 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011303 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011304 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011305 * @sax: the SAX handler bloc (possibly NULL)
11306 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11307 * @depth: Used for loop detection, use 0
11308 * @URL: the URL for the entity to load
11309 * @ID: the System ID for the entity to load
11310 * @list: the return value for the set of parsed nodes
11311 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011312 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011313 *
11314 * Returns 0 if the entity is well formed, -1 in case of args problem and
11315 * the parser error code otherwise
11316 */
11317
Daniel Veillard7d515752003-09-26 19:12:37 +000011318static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011319xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11320 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011321 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011322 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011323 xmlParserCtxtPtr ctxt;
11324 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011325 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011326 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011327 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011328 xmlChar start[4];
11329 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011330
11331 if (depth > 40) {
11332 return(XML_ERR_ENTITY_LOOP);
11333 }
11334
11335
11336
11337 if (list != NULL)
11338 *list = NULL;
11339 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011340 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000011341 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000011342 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011343
11344
11345 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011346 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011347 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011348 if (oldctxt != NULL) {
11349 ctxt->_private = oldctxt->_private;
11350 ctxt->loadsubset = oldctxt->loadsubset;
11351 ctxt->validate = oldctxt->validate;
11352 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011353 ctxt->record_info = oldctxt->record_info;
11354 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11355 ctxt->node_seq.length = oldctxt->node_seq.length;
11356 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011357 } else {
11358 /*
11359 * Doing validity checking on chunk without context
11360 * doesn't make sense
11361 */
11362 ctxt->_private = NULL;
11363 ctxt->validate = 0;
11364 ctxt->external = 2;
11365 ctxt->loadsubset = 0;
11366 }
Owen Taylor3473f882001-02-23 17:55:21 +000011367 if (sax != NULL) {
11368 oldsax = ctxt->sax;
11369 ctxt->sax = sax;
11370 if (user_data != NULL)
11371 ctxt->userData = user_data;
11372 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011373 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 newDoc = xmlNewDoc(BAD_CAST "1.0");
11375 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011376 ctxt->node_seq.maximum = 0;
11377 ctxt->node_seq.length = 0;
11378 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011379 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011380 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011381 }
Daniel Veillard30e76072006-03-09 14:13:55 +000011382 newDoc->intSubset = doc->intSubset;
11383 newDoc->extSubset = doc->extSubset;
11384 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011385 xmlDictReference(newDoc->dict);
11386
Owen Taylor3473f882001-02-23 17:55:21 +000011387 if (doc->URL != NULL) {
11388 newDoc->URL = xmlStrdup(doc->URL);
11389 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011390 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11391 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011392 if (sax != NULL)
11393 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011394 ctxt->node_seq.maximum = 0;
11395 ctxt->node_seq.length = 0;
11396 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011397 xmlFreeParserCtxt(ctxt);
11398 newDoc->intSubset = NULL;
11399 newDoc->extSubset = NULL;
11400 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011401 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011402 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011403 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011404 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000011405 ctxt->myDoc = doc;
11406 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011407
Daniel Veillard87a764e2001-06-20 17:41:10 +000011408 /*
11409 * Get the 4 first bytes and decode the charset
11410 * if enc != XML_CHAR_ENCODING_NONE
11411 * plug some encoding conversion routines.
11412 */
11413 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011414 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11415 start[0] = RAW;
11416 start[1] = NXT(1);
11417 start[2] = NXT(2);
11418 start[3] = NXT(3);
11419 enc = xmlDetectCharEncoding(start, 4);
11420 if (enc != XML_CHAR_ENCODING_NONE) {
11421 xmlSwitchEncoding(ctxt, enc);
11422 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011423 }
11424
Owen Taylor3473f882001-02-23 17:55:21 +000011425 /*
11426 * Parse a possible text declaration first
11427 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011428 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011429 xmlParseTextDecl(ctxt);
11430 }
11431
Owen Taylor3473f882001-02-23 17:55:21 +000011432 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011433 ctxt->depth = depth;
11434
11435 xmlParseContent(ctxt);
11436
Daniel Veillard561b7f82002-03-20 21:55:57 +000011437 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011438 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011439 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011440 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011441 }
11442 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011443 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011444 }
11445
11446 if (!ctxt->wellFormed) {
11447 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011448 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011449 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011450 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011451 } else {
11452 if (list != NULL) {
11453 xmlNodePtr cur;
11454
11455 /*
11456 * Return the newly created nodeset after unlinking it from
11457 * they pseudo parent.
11458 */
11459 cur = newDoc->children->children;
11460 *list = cur;
11461 while (cur != NULL) {
11462 cur->parent = NULL;
11463 cur = cur->next;
11464 }
11465 newDoc->children->children = NULL;
11466 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011467 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011468 }
11469 if (sax != NULL)
11470 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011471 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11472 oldctxt->node_seq.length = ctxt->node_seq.length;
11473 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011474 ctxt->node_seq.maximum = 0;
11475 ctxt->node_seq.length = 0;
11476 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011477 xmlFreeParserCtxt(ctxt);
11478 newDoc->intSubset = NULL;
11479 newDoc->extSubset = NULL;
11480 xmlFreeDoc(newDoc);
11481
11482 return(ret);
11483}
11484
Daniel Veillard81273902003-09-30 00:43:48 +000011485#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011486/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011487 * xmlParseExternalEntity:
11488 * @doc: the document the chunk pertains to
11489 * @sax: the SAX handler bloc (possibly NULL)
11490 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11491 * @depth: Used for loop detection, use 0
11492 * @URL: the URL for the entity to load
11493 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011494 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011495 *
11496 * Parse an external general entity
11497 * An external general parsed entity is well-formed if it matches the
11498 * production labeled extParsedEnt.
11499 *
11500 * [78] extParsedEnt ::= TextDecl? content
11501 *
11502 * Returns 0 if the entity is well formed, -1 in case of args problem and
11503 * the parser error code otherwise
11504 */
11505
11506int
11507xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011508 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011509 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011510 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011511}
11512
11513/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011514 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011515 * @doc: the document the chunk pertains to
11516 * @sax: the SAX handler bloc (possibly NULL)
11517 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11518 * @depth: Used for loop detection, use 0
11519 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011520 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011521 *
11522 * Parse a well-balanced chunk of an XML document
11523 * called by the parser
11524 * The allowed sequence for the Well Balanced Chunk is the one defined by
11525 * the content production in the XML grammar:
11526 *
11527 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11528 *
11529 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11530 * the parser error code otherwise
11531 */
11532
11533int
11534xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011535 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011536 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11537 depth, string, lst, 0 );
11538}
Daniel Veillard81273902003-09-30 00:43:48 +000011539#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011540
11541/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011542 * xmlParseBalancedChunkMemoryInternal:
11543 * @oldctxt: the existing parsing context
11544 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11545 * @user_data: the user data field for the parser context
11546 * @lst: the return value for the set of parsed nodes
11547 *
11548 *
11549 * Parse a well-balanced chunk of an XML document
11550 * called by the parser
11551 * The allowed sequence for the Well Balanced Chunk is the one defined by
11552 * the content production in the XML grammar:
11553 *
11554 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11555 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011556 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11557 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011558 *
11559 * In case recover is set to 1, the nodelist will not be empty even if
11560 * the parsed chunk is not well balanced.
11561 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011562static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011563xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11564 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11565 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011566 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011567 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011568 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011569 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011570 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011571 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011572 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011573
11574 if (oldctxt->depth > 40) {
11575 return(XML_ERR_ENTITY_LOOP);
11576 }
11577
11578
11579 if (lst != NULL)
11580 *lst = NULL;
11581 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011582 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011583
11584 size = xmlStrlen(string);
11585
11586 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011587 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011588 if (user_data != NULL)
11589 ctxt->userData = user_data;
11590 else
11591 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011592 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11593 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011594 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11595 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11596 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011597
11598 oldsax = ctxt->sax;
11599 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011600 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011601 ctxt->replaceEntities = oldctxt->replaceEntities;
11602 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011603
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011604 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011605 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011606 newDoc = xmlNewDoc(BAD_CAST "1.0");
11607 if (newDoc == NULL) {
11608 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011609 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011610 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011611 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011612 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011613 newDoc->dict = ctxt->dict;
11614 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011615 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011616 } else {
11617 ctxt->myDoc = oldctxt->myDoc;
11618 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011619 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011620 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011621 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11622 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011623 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011624 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011625 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011626 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011627 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011628 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011629 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011630 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011631 ctxt->myDoc->children = NULL;
11632 ctxt->myDoc->last = NULL;
11633 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011634 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011635 ctxt->instate = XML_PARSER_CONTENT;
11636 ctxt->depth = oldctxt->depth + 1;
11637
Daniel Veillard328f48c2002-11-15 15:24:34 +000011638 ctxt->validate = 0;
11639 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011640 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11641 /*
11642 * ID/IDREF registration will be done in xmlValidateElement below
11643 */
11644 ctxt->loadsubset |= XML_SKIP_IDS;
11645 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011646 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011647 ctxt->attsDefault = oldctxt->attsDefault;
11648 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011649
Daniel Veillard68e9e742002-11-16 15:35:11 +000011650 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011651 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011653 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011654 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011655 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011656 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011658 }
11659
11660 if (!ctxt->wellFormed) {
11661 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011662 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011663 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011664 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011665 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011666 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011667 }
11668
William M. Brack7b9154b2003-09-27 19:23:50 +000011669 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011670 xmlNodePtr cur;
11671
11672 /*
11673 * Return the newly created nodeset after unlinking it from
11674 * they pseudo parent.
11675 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011676 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011677 *lst = cur;
11678 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011679#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011680 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11681 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11682 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011683 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11684 oldctxt->myDoc, cur);
11685 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011686#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011687 cur->parent = NULL;
11688 cur = cur->next;
11689 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011690 ctxt->myDoc->children->children = NULL;
11691 }
11692 if (ctxt->myDoc != NULL) {
11693 xmlFreeNode(ctxt->myDoc->children);
11694 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011695 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011696 }
11697
11698 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011699 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011700 ctxt->attsDefault = NULL;
11701 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011702 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011703 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011704 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011705 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011706
11707 return(ret);
11708}
11709
Daniel Veillard29b17482004-08-16 00:39:03 +000011710/**
11711 * xmlParseInNodeContext:
11712 * @node: the context node
11713 * @data: the input string
11714 * @datalen: the input string length in bytes
11715 * @options: a combination of xmlParserOption
11716 * @lst: the return value for the set of parsed nodes
11717 *
11718 * Parse a well-balanced chunk of an XML document
11719 * within the context (DTD, namespaces, etc ...) of the given node.
11720 *
11721 * The allowed sequence for the data is a Well Balanced Chunk defined by
11722 * the content production in the XML grammar:
11723 *
11724 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11725 *
11726 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11727 * error code otherwise
11728 */
11729xmlParserErrors
11730xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11731 int options, xmlNodePtr *lst) {
11732#ifdef SAX2
11733 xmlParserCtxtPtr ctxt;
11734 xmlDocPtr doc = NULL;
11735 xmlNodePtr fake, cur;
11736 int nsnr = 0;
11737
11738 xmlParserErrors ret = XML_ERR_OK;
11739
11740 /*
11741 * check all input parameters, grab the document
11742 */
11743 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11744 return(XML_ERR_INTERNAL_ERROR);
11745 switch (node->type) {
11746 case XML_ELEMENT_NODE:
11747 case XML_ATTRIBUTE_NODE:
11748 case XML_TEXT_NODE:
11749 case XML_CDATA_SECTION_NODE:
11750 case XML_ENTITY_REF_NODE:
11751 case XML_PI_NODE:
11752 case XML_COMMENT_NODE:
11753 case XML_DOCUMENT_NODE:
11754 case XML_HTML_DOCUMENT_NODE:
11755 break;
11756 default:
11757 return(XML_ERR_INTERNAL_ERROR);
11758
11759 }
11760 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11761 (node->type != XML_DOCUMENT_NODE) &&
11762 (node->type != XML_HTML_DOCUMENT_NODE))
11763 node = node->parent;
11764 if (node == NULL)
11765 return(XML_ERR_INTERNAL_ERROR);
11766 if (node->type == XML_ELEMENT_NODE)
11767 doc = node->doc;
11768 else
11769 doc = (xmlDocPtr) node;
11770 if (doc == NULL)
11771 return(XML_ERR_INTERNAL_ERROR);
11772
11773 /*
11774 * allocate a context and set-up everything not related to the
11775 * node position in the tree
11776 */
11777 if (doc->type == XML_DOCUMENT_NODE)
11778 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11779#ifdef LIBXML_HTML_ENABLED
11780 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11781 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11782#endif
11783 else
11784 return(XML_ERR_INTERNAL_ERROR);
11785
11786 if (ctxt == NULL)
11787 return(XML_ERR_NO_MEMORY);
11788 fake = xmlNewComment(NULL);
11789 if (fake == NULL) {
11790 xmlFreeParserCtxt(ctxt);
11791 return(XML_ERR_NO_MEMORY);
11792 }
11793 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011794
11795 /*
11796 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11797 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11798 * we must wait until the last moment to free the original one.
11799 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011800 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011801 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011802 xmlDictFree(ctxt->dict);
11803 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011804 } else
11805 options |= XML_PARSE_NODICT;
11806
11807 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011808 xmlDetectSAX2(ctxt);
11809 ctxt->myDoc = doc;
11810
11811 if (node->type == XML_ELEMENT_NODE) {
11812 nodePush(ctxt, node);
11813 /*
11814 * initialize the SAX2 namespaces stack
11815 */
11816 cur = node;
11817 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11818 xmlNsPtr ns = cur->nsDef;
11819 const xmlChar *iprefix, *ihref;
11820
11821 while (ns != NULL) {
11822 if (ctxt->dict) {
11823 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11824 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11825 } else {
11826 iprefix = ns->prefix;
11827 ihref = ns->href;
11828 }
11829
11830 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11831 nsPush(ctxt, iprefix, ihref);
11832 nsnr++;
11833 }
11834 ns = ns->next;
11835 }
11836 cur = cur->parent;
11837 }
11838 ctxt->instate = XML_PARSER_CONTENT;
11839 }
11840
11841 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11842 /*
11843 * ID/IDREF registration will be done in xmlValidateElement below
11844 */
11845 ctxt->loadsubset |= XML_SKIP_IDS;
11846 }
11847
Daniel Veillard499cc922006-01-18 17:22:35 +000011848#ifdef LIBXML_HTML_ENABLED
11849 if (doc->type == XML_HTML_DOCUMENT_NODE)
11850 __htmlParseContent(ctxt);
11851 else
11852#endif
11853 xmlParseContent(ctxt);
11854
Daniel Veillard29b17482004-08-16 00:39:03 +000011855 nsPop(ctxt, nsnr);
11856 if ((RAW == '<') && (NXT(1) == '/')) {
11857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11858 } else if (RAW != 0) {
11859 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11860 }
11861 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11863 ctxt->wellFormed = 0;
11864 }
11865
11866 if (!ctxt->wellFormed) {
11867 if (ctxt->errNo == 0)
11868 ret = XML_ERR_INTERNAL_ERROR;
11869 else
11870 ret = (xmlParserErrors)ctxt->errNo;
11871 } else {
11872 ret = XML_ERR_OK;
11873 }
11874
11875 /*
11876 * Return the newly created nodeset after unlinking it from
11877 * the pseudo sibling.
11878 */
11879
11880 cur = fake->next;
11881 fake->next = NULL;
11882 node->last = fake;
11883
11884 if (cur != NULL) {
11885 cur->prev = NULL;
11886 }
11887
11888 *lst = cur;
11889
11890 while (cur != NULL) {
11891 cur->parent = NULL;
11892 cur = cur->next;
11893 }
11894
11895 xmlUnlinkNode(fake);
11896 xmlFreeNode(fake);
11897
11898
11899 if (ret != XML_ERR_OK) {
11900 xmlFreeNodeList(*lst);
11901 *lst = NULL;
11902 }
William M. Brackc3f81342004-10-03 01:22:44 +000011903
William M. Brackb7b54de2004-10-06 16:38:01 +000011904 if (doc->dict != NULL)
11905 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011906 xmlFreeParserCtxt(ctxt);
11907
11908 return(ret);
11909#else /* !SAX2 */
11910 return(XML_ERR_INTERNAL_ERROR);
11911#endif
11912}
11913
Daniel Veillard81273902003-09-30 00:43:48 +000011914#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011915/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011916 * xmlParseBalancedChunkMemoryRecover:
11917 * @doc: the document the chunk pertains to
11918 * @sax: the SAX handler bloc (possibly NULL)
11919 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11920 * @depth: Used for loop detection, use 0
11921 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11922 * @lst: the return value for the set of parsed nodes
11923 * @recover: return nodes even if the data is broken (use 0)
11924 *
11925 *
11926 * Parse a well-balanced chunk of an XML document
11927 * called by the parser
11928 * The allowed sequence for the Well Balanced Chunk is the one defined by
11929 * the content production in the XML grammar:
11930 *
11931 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11932 *
11933 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11934 * the parser error code otherwise
11935 *
11936 * In case recover is set to 1, the nodelist will not be empty even if
11937 * the parsed chunk is not well balanced.
11938 */
11939int
11940xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11941 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11942 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011943 xmlParserCtxtPtr ctxt;
11944 xmlDocPtr newDoc;
11945 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011946 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011947 int size;
11948 int ret = 0;
11949
11950 if (depth > 40) {
11951 return(XML_ERR_ENTITY_LOOP);
11952 }
11953
11954
Daniel Veillardcda96922001-08-21 10:56:31 +000011955 if (lst != NULL)
11956 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011957 if (string == NULL)
11958 return(-1);
11959
11960 size = xmlStrlen(string);
11961
11962 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11963 if (ctxt == NULL) return(-1);
11964 ctxt->userData = ctxt;
11965 if (sax != NULL) {
11966 oldsax = ctxt->sax;
11967 ctxt->sax = sax;
11968 if (user_data != NULL)
11969 ctxt->userData = user_data;
11970 }
11971 newDoc = xmlNewDoc(BAD_CAST "1.0");
11972 if (newDoc == NULL) {
11973 xmlFreeParserCtxt(ctxt);
11974 return(-1);
11975 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011976 if ((doc != NULL) && (doc->dict != NULL)) {
11977 xmlDictFree(ctxt->dict);
11978 ctxt->dict = doc->dict;
11979 xmlDictReference(ctxt->dict);
11980 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11981 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11982 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11983 ctxt->dictNames = 1;
11984 } else {
11985 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11986 }
Owen Taylor3473f882001-02-23 17:55:21 +000011987 if (doc != NULL) {
11988 newDoc->intSubset = doc->intSubset;
11989 newDoc->extSubset = doc->extSubset;
11990 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011991 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11992 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011993 if (sax != NULL)
11994 ctxt->sax = oldsax;
11995 xmlFreeParserCtxt(ctxt);
11996 newDoc->intSubset = NULL;
11997 newDoc->extSubset = NULL;
11998 xmlFreeDoc(newDoc);
11999 return(-1);
12000 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012001 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12002 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012003 if (doc == NULL) {
12004 ctxt->myDoc = newDoc;
12005 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012006 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012007 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012008 /* Ensure that doc has XML spec namespace */
12009 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12010 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012011 }
12012 ctxt->instate = XML_PARSER_CONTENT;
12013 ctxt->depth = depth;
12014
12015 /*
12016 * Doing validity checking on chunk doesn't make sense
12017 */
12018 ctxt->validate = 0;
12019 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012020 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012021
Daniel Veillardb39bc392002-10-26 19:29:51 +000012022 if ( doc != NULL ){
12023 content = doc->children;
12024 doc->children = NULL;
12025 xmlParseContent(ctxt);
12026 doc->children = content;
12027 }
12028 else {
12029 xmlParseContent(ctxt);
12030 }
Owen Taylor3473f882001-02-23 17:55:21 +000012031 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012032 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012033 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012034 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012035 }
12036 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012037 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012038 }
12039
12040 if (!ctxt->wellFormed) {
12041 if (ctxt->errNo == 0)
12042 ret = 1;
12043 else
12044 ret = ctxt->errNo;
12045 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012046 ret = 0;
12047 }
12048
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012049 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12050 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012051
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012052 /*
12053 * Return the newly created nodeset after unlinking it from
12054 * they pseudo parent.
12055 */
12056 cur = newDoc->children->children;
12057 *lst = cur;
12058 while (cur != NULL) {
12059 xmlSetTreeDoc(cur, doc);
12060 cur->parent = NULL;
12061 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012062 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012063 newDoc->children->children = NULL;
12064 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012065
Owen Taylor3473f882001-02-23 17:55:21 +000012066 if (sax != NULL)
12067 ctxt->sax = oldsax;
12068 xmlFreeParserCtxt(ctxt);
12069 newDoc->intSubset = NULL;
12070 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012071 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012072 xmlFreeDoc(newDoc);
12073
12074 return(ret);
12075}
12076
12077/**
12078 * xmlSAXParseEntity:
12079 * @sax: the SAX handler block
12080 * @filename: the filename
12081 *
12082 * parse an XML external entity out of context and build a tree.
12083 * It use the given SAX function block to handle the parsing callback.
12084 * If sax is NULL, fallback to the default DOM tree building routines.
12085 *
12086 * [78] extParsedEnt ::= TextDecl? content
12087 *
12088 * This correspond to a "Well Balanced" chunk
12089 *
12090 * Returns the resulting document tree
12091 */
12092
12093xmlDocPtr
12094xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12095 xmlDocPtr ret;
12096 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012097
12098 ctxt = xmlCreateFileParserCtxt(filename);
12099 if (ctxt == NULL) {
12100 return(NULL);
12101 }
12102 if (sax != NULL) {
12103 if (ctxt->sax != NULL)
12104 xmlFree(ctxt->sax);
12105 ctxt->sax = sax;
12106 ctxt->userData = NULL;
12107 }
12108
Owen Taylor3473f882001-02-23 17:55:21 +000012109 xmlParseExtParsedEnt(ctxt);
12110
12111 if (ctxt->wellFormed)
12112 ret = ctxt->myDoc;
12113 else {
12114 ret = NULL;
12115 xmlFreeDoc(ctxt->myDoc);
12116 ctxt->myDoc = NULL;
12117 }
12118 if (sax != NULL)
12119 ctxt->sax = NULL;
12120 xmlFreeParserCtxt(ctxt);
12121
12122 return(ret);
12123}
12124
12125/**
12126 * xmlParseEntity:
12127 * @filename: the filename
12128 *
12129 * parse an XML external entity out of context and build a tree.
12130 *
12131 * [78] extParsedEnt ::= TextDecl? content
12132 *
12133 * This correspond to a "Well Balanced" chunk
12134 *
12135 * Returns the resulting document tree
12136 */
12137
12138xmlDocPtr
12139xmlParseEntity(const char *filename) {
12140 return(xmlSAXParseEntity(NULL, filename));
12141}
Daniel Veillard81273902003-09-30 00:43:48 +000012142#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012143
12144/**
12145 * xmlCreateEntityParserCtxt:
12146 * @URL: the entity URL
12147 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012148 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012149 *
12150 * Create a parser context for an external entity
12151 * Automatic support for ZLIB/Compress compressed document is provided
12152 * by default if found at compile-time.
12153 *
12154 * Returns the new parser context or NULL
12155 */
12156xmlParserCtxtPtr
12157xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12158 const xmlChar *base) {
12159 xmlParserCtxtPtr ctxt;
12160 xmlParserInputPtr inputStream;
12161 char *directory = NULL;
12162 xmlChar *uri;
12163
12164 ctxt = xmlNewParserCtxt();
12165 if (ctxt == NULL) {
12166 return(NULL);
12167 }
12168
12169 uri = xmlBuildURI(URL, base);
12170
12171 if (uri == NULL) {
12172 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12173 if (inputStream == NULL) {
12174 xmlFreeParserCtxt(ctxt);
12175 return(NULL);
12176 }
12177
12178 inputPush(ctxt, inputStream);
12179
12180 if ((ctxt->directory == NULL) && (directory == NULL))
12181 directory = xmlParserGetDirectory((char *)URL);
12182 if ((ctxt->directory == NULL) && (directory != NULL))
12183 ctxt->directory = directory;
12184 } else {
12185 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12186 if (inputStream == NULL) {
12187 xmlFree(uri);
12188 xmlFreeParserCtxt(ctxt);
12189 return(NULL);
12190 }
12191
12192 inputPush(ctxt, inputStream);
12193
12194 if ((ctxt->directory == NULL) && (directory == NULL))
12195 directory = xmlParserGetDirectory((char *)uri);
12196 if ((ctxt->directory == NULL) && (directory != NULL))
12197 ctxt->directory = directory;
12198 xmlFree(uri);
12199 }
Owen Taylor3473f882001-02-23 17:55:21 +000012200 return(ctxt);
12201}
12202
12203/************************************************************************
12204 * *
12205 * Front ends when parsing from a file *
12206 * *
12207 ************************************************************************/
12208
12209/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012210 * xmlCreateURLParserCtxt:
12211 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012212 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012213 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012214 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012215 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012216 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012217 *
12218 * Returns the new parser context or NULL
12219 */
12220xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012221xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012222{
12223 xmlParserCtxtPtr ctxt;
12224 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012225 char *directory = NULL;
12226
Owen Taylor3473f882001-02-23 17:55:21 +000012227 ctxt = xmlNewParserCtxt();
12228 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012229 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012230 return(NULL);
12231 }
12232
Daniel Veillarddf292f72005-01-16 19:00:15 +000012233 if (options)
12234 xmlCtxtUseOptions(ctxt, options);
12235 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012236
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012237 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012238 if (inputStream == NULL) {
12239 xmlFreeParserCtxt(ctxt);
12240 return(NULL);
12241 }
12242
Owen Taylor3473f882001-02-23 17:55:21 +000012243 inputPush(ctxt, inputStream);
12244 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012245 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012246 if ((ctxt->directory == NULL) && (directory != NULL))
12247 ctxt->directory = directory;
12248
12249 return(ctxt);
12250}
12251
Daniel Veillard61b93382003-11-03 14:28:31 +000012252/**
12253 * xmlCreateFileParserCtxt:
12254 * @filename: the filename
12255 *
12256 * Create a parser context for a file content.
12257 * Automatic support for ZLIB/Compress compressed document is provided
12258 * by default if found at compile-time.
12259 *
12260 * Returns the new parser context or NULL
12261 */
12262xmlParserCtxtPtr
12263xmlCreateFileParserCtxt(const char *filename)
12264{
12265 return(xmlCreateURLParserCtxt(filename, 0));
12266}
12267
Daniel Veillard81273902003-09-30 00:43:48 +000012268#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012269/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012270 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012271 * @sax: the SAX handler block
12272 * @filename: the filename
12273 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12274 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012275 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012276 *
12277 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12278 * compressed document is provided by default if found at compile-time.
12279 * It use the given SAX function block to handle the parsing callback.
12280 * If sax is NULL, fallback to the default DOM tree building routines.
12281 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012282 * User data (void *) is stored within the parser context in the
12283 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012284 *
Owen Taylor3473f882001-02-23 17:55:21 +000012285 * Returns the resulting document tree
12286 */
12287
12288xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012289xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12290 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012291 xmlDocPtr ret;
12292 xmlParserCtxtPtr ctxt;
12293 char *directory = NULL;
12294
Daniel Veillard635ef722001-10-29 11:48:19 +000012295 xmlInitParser();
12296
Owen Taylor3473f882001-02-23 17:55:21 +000012297 ctxt = xmlCreateFileParserCtxt(filename);
12298 if (ctxt == NULL) {
12299 return(NULL);
12300 }
12301 if (sax != NULL) {
12302 if (ctxt->sax != NULL)
12303 xmlFree(ctxt->sax);
12304 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012305 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012306 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012307 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012308 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012309 }
Owen Taylor3473f882001-02-23 17:55:21 +000012310
12311 if ((ctxt->directory == NULL) && (directory == NULL))
12312 directory = xmlParserGetDirectory(filename);
12313 if ((ctxt->directory == NULL) && (directory != NULL))
12314 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12315
Daniel Veillarddad3f682002-11-17 16:47:27 +000012316 ctxt->recovery = recovery;
12317
Owen Taylor3473f882001-02-23 17:55:21 +000012318 xmlParseDocument(ctxt);
12319
William M. Brackc07329e2003-09-08 01:57:30 +000012320 if ((ctxt->wellFormed) || recovery) {
12321 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012322 if (ret != NULL) {
12323 if (ctxt->input->buf->compressed > 0)
12324 ret->compression = 9;
12325 else
12326 ret->compression = ctxt->input->buf->compressed;
12327 }
William M. Brackc07329e2003-09-08 01:57:30 +000012328 }
Owen Taylor3473f882001-02-23 17:55:21 +000012329 else {
12330 ret = NULL;
12331 xmlFreeDoc(ctxt->myDoc);
12332 ctxt->myDoc = NULL;
12333 }
12334 if (sax != NULL)
12335 ctxt->sax = NULL;
12336 xmlFreeParserCtxt(ctxt);
12337
12338 return(ret);
12339}
12340
12341/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012342 * xmlSAXParseFile:
12343 * @sax: the SAX handler block
12344 * @filename: the filename
12345 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12346 * documents
12347 *
12348 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12349 * compressed document is provided by default if found at compile-time.
12350 * It use the given SAX function block to handle the parsing callback.
12351 * If sax is NULL, fallback to the default DOM tree building routines.
12352 *
12353 * Returns the resulting document tree
12354 */
12355
12356xmlDocPtr
12357xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12358 int recovery) {
12359 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12360}
12361
12362/**
Owen Taylor3473f882001-02-23 17:55:21 +000012363 * xmlRecoverDoc:
12364 * @cur: a pointer to an array of xmlChar
12365 *
12366 * parse an XML in-memory document and build a tree.
12367 * In the case the document is not Well Formed, a tree is built anyway
12368 *
12369 * Returns the resulting document tree
12370 */
12371
12372xmlDocPtr
12373xmlRecoverDoc(xmlChar *cur) {
12374 return(xmlSAXParseDoc(NULL, cur, 1));
12375}
12376
12377/**
12378 * xmlParseFile:
12379 * @filename: the filename
12380 *
12381 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12382 * compressed document is provided by default if found at compile-time.
12383 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012384 * Returns the resulting document tree if the file was wellformed,
12385 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012386 */
12387
12388xmlDocPtr
12389xmlParseFile(const char *filename) {
12390 return(xmlSAXParseFile(NULL, filename, 0));
12391}
12392
12393/**
12394 * xmlRecoverFile:
12395 * @filename: the filename
12396 *
12397 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12398 * compressed document is provided by default if found at compile-time.
12399 * In the case the document is not Well Formed, a tree is built anyway
12400 *
12401 * Returns the resulting document tree
12402 */
12403
12404xmlDocPtr
12405xmlRecoverFile(const char *filename) {
12406 return(xmlSAXParseFile(NULL, filename, 1));
12407}
12408
12409
12410/**
12411 * xmlSetupParserForBuffer:
12412 * @ctxt: an XML parser context
12413 * @buffer: a xmlChar * buffer
12414 * @filename: a file name
12415 *
12416 * Setup the parser context to parse a new buffer; Clears any prior
12417 * contents from the parser context. The buffer parameter must not be
12418 * NULL, but the filename parameter can be
12419 */
12420void
12421xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12422 const char* filename)
12423{
12424 xmlParserInputPtr input;
12425
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012426 if ((ctxt == NULL) || (buffer == NULL))
12427 return;
12428
Owen Taylor3473f882001-02-23 17:55:21 +000012429 input = xmlNewInputStream(ctxt);
12430 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012431 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012432 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012433 return;
12434 }
12435
12436 xmlClearParserCtxt(ctxt);
12437 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012438 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012439 input->base = buffer;
12440 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012441 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012442 inputPush(ctxt, input);
12443}
12444
12445/**
12446 * xmlSAXUserParseFile:
12447 * @sax: a SAX handler
12448 * @user_data: The user data returned on SAX callbacks
12449 * @filename: a file name
12450 *
12451 * parse an XML file and call the given SAX handler routines.
12452 * Automatic support for ZLIB/Compress compressed document is provided
12453 *
12454 * Returns 0 in case of success or a error number otherwise
12455 */
12456int
12457xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12458 const char *filename) {
12459 int ret = 0;
12460 xmlParserCtxtPtr ctxt;
12461
12462 ctxt = xmlCreateFileParserCtxt(filename);
12463 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012464#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012465 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012466#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012467 xmlFree(ctxt->sax);
12468 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012469 xmlDetectSAX2(ctxt);
12470
Owen Taylor3473f882001-02-23 17:55:21 +000012471 if (user_data != NULL)
12472 ctxt->userData = user_data;
12473
12474 xmlParseDocument(ctxt);
12475
12476 if (ctxt->wellFormed)
12477 ret = 0;
12478 else {
12479 if (ctxt->errNo != 0)
12480 ret = ctxt->errNo;
12481 else
12482 ret = -1;
12483 }
12484 if (sax != NULL)
12485 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012486 if (ctxt->myDoc != NULL) {
12487 xmlFreeDoc(ctxt->myDoc);
12488 ctxt->myDoc = NULL;
12489 }
Owen Taylor3473f882001-02-23 17:55:21 +000012490 xmlFreeParserCtxt(ctxt);
12491
12492 return ret;
12493}
Daniel Veillard81273902003-09-30 00:43:48 +000012494#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012495
12496/************************************************************************
12497 * *
12498 * Front ends when parsing from memory *
12499 * *
12500 ************************************************************************/
12501
12502/**
12503 * xmlCreateMemoryParserCtxt:
12504 * @buffer: a pointer to a char array
12505 * @size: the size of the array
12506 *
12507 * Create a parser context for an XML in-memory document.
12508 *
12509 * Returns the new parser context or NULL
12510 */
12511xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012512xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012513 xmlParserCtxtPtr ctxt;
12514 xmlParserInputPtr input;
12515 xmlParserInputBufferPtr buf;
12516
12517 if (buffer == NULL)
12518 return(NULL);
12519 if (size <= 0)
12520 return(NULL);
12521
12522 ctxt = xmlNewParserCtxt();
12523 if (ctxt == NULL)
12524 return(NULL);
12525
Daniel Veillard53350552003-09-18 13:35:51 +000012526 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012527 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012528 if (buf == NULL) {
12529 xmlFreeParserCtxt(ctxt);
12530 return(NULL);
12531 }
Owen Taylor3473f882001-02-23 17:55:21 +000012532
12533 input = xmlNewInputStream(ctxt);
12534 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012535 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012536 xmlFreeParserCtxt(ctxt);
12537 return(NULL);
12538 }
12539
12540 input->filename = NULL;
12541 input->buf = buf;
12542 input->base = input->buf->buffer->content;
12543 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012544 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012545
12546 inputPush(ctxt, input);
12547 return(ctxt);
12548}
12549
Daniel Veillard81273902003-09-30 00:43:48 +000012550#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012551/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012552 * xmlSAXParseMemoryWithData:
12553 * @sax: the SAX handler block
12554 * @buffer: an pointer to a char array
12555 * @size: the size of the array
12556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12557 * documents
12558 * @data: the userdata
12559 *
12560 * parse an XML in-memory block and use the given SAX function block
12561 * to handle the parsing callback. If sax is NULL, fallback to the default
12562 * DOM tree building routines.
12563 *
12564 * User data (void *) is stored within the parser context in the
12565 * context's _private member, so it is available nearly everywhere in libxml
12566 *
12567 * Returns the resulting document tree
12568 */
12569
12570xmlDocPtr
12571xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12572 int size, int recovery, void *data) {
12573 xmlDocPtr ret;
12574 xmlParserCtxtPtr ctxt;
12575
12576 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12577 if (ctxt == NULL) return(NULL);
12578 if (sax != NULL) {
12579 if (ctxt->sax != NULL)
12580 xmlFree(ctxt->sax);
12581 ctxt->sax = sax;
12582 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012583 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012584 if (data!=NULL) {
12585 ctxt->_private=data;
12586 }
12587
Daniel Veillardadba5f12003-04-04 16:09:01 +000012588 ctxt->recovery = recovery;
12589
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012590 xmlParseDocument(ctxt);
12591
12592 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12593 else {
12594 ret = NULL;
12595 xmlFreeDoc(ctxt->myDoc);
12596 ctxt->myDoc = NULL;
12597 }
12598 if (sax != NULL)
12599 ctxt->sax = NULL;
12600 xmlFreeParserCtxt(ctxt);
12601
12602 return(ret);
12603}
12604
12605/**
Owen Taylor3473f882001-02-23 17:55:21 +000012606 * xmlSAXParseMemory:
12607 * @sax: the SAX handler block
12608 * @buffer: an pointer to a char array
12609 * @size: the size of the array
12610 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12611 * documents
12612 *
12613 * parse an XML in-memory block and use the given SAX function block
12614 * to handle the parsing callback. If sax is NULL, fallback to the default
12615 * DOM tree building routines.
12616 *
12617 * Returns the resulting document tree
12618 */
12619xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012620xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12621 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012622 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012623}
12624
12625/**
12626 * xmlParseMemory:
12627 * @buffer: an pointer to a char array
12628 * @size: the size of the array
12629 *
12630 * parse an XML in-memory block and build a tree.
12631 *
12632 * Returns the resulting document tree
12633 */
12634
Daniel Veillard50822cb2001-07-26 20:05:51 +000012635xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012636 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12637}
12638
12639/**
12640 * xmlRecoverMemory:
12641 * @buffer: an pointer to a char array
12642 * @size: the size of the array
12643 *
12644 * parse an XML in-memory block and build a tree.
12645 * In the case the document is not Well Formed, a tree is built anyway
12646 *
12647 * Returns the resulting document tree
12648 */
12649
Daniel Veillard50822cb2001-07-26 20:05:51 +000012650xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012651 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12652}
12653
12654/**
12655 * xmlSAXUserParseMemory:
12656 * @sax: a SAX handler
12657 * @user_data: The user data returned on SAX callbacks
12658 * @buffer: an in-memory XML document input
12659 * @size: the length of the XML document in bytes
12660 *
12661 * A better SAX parsing routine.
12662 * parse an XML in-memory buffer and call the given SAX handler routines.
12663 *
12664 * Returns 0 in case of success or a error number otherwise
12665 */
12666int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012667 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012668 int ret = 0;
12669 xmlParserCtxtPtr ctxt;
12670 xmlSAXHandlerPtr oldsax = NULL;
12671
Daniel Veillard9e923512002-08-14 08:48:52 +000012672 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012673 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12674 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012675 oldsax = ctxt->sax;
12676 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012677 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012678 if (user_data != NULL)
12679 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012680
12681 xmlParseDocument(ctxt);
12682
12683 if (ctxt->wellFormed)
12684 ret = 0;
12685 else {
12686 if (ctxt->errNo != 0)
12687 ret = ctxt->errNo;
12688 else
12689 ret = -1;
12690 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012691 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012692 if (ctxt->myDoc != NULL) {
12693 xmlFreeDoc(ctxt->myDoc);
12694 ctxt->myDoc = NULL;
12695 }
Owen Taylor3473f882001-02-23 17:55:21 +000012696 xmlFreeParserCtxt(ctxt);
12697
12698 return ret;
12699}
Daniel Veillard81273902003-09-30 00:43:48 +000012700#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012701
12702/**
12703 * xmlCreateDocParserCtxt:
12704 * @cur: a pointer to an array of xmlChar
12705 *
12706 * Creates a parser context for an XML in-memory document.
12707 *
12708 * Returns the new parser context or NULL
12709 */
12710xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012711xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012712 int len;
12713
12714 if (cur == NULL)
12715 return(NULL);
12716 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012717 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012718}
12719
Daniel Veillard81273902003-09-30 00:43:48 +000012720#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012721/**
12722 * xmlSAXParseDoc:
12723 * @sax: the SAX handler block
12724 * @cur: a pointer to an array of xmlChar
12725 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12726 * documents
12727 *
12728 * parse an XML in-memory document and build a tree.
12729 * It use the given SAX function block to handle the parsing callback.
12730 * If sax is NULL, fallback to the default DOM tree building routines.
12731 *
12732 * Returns the resulting document tree
12733 */
12734
12735xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012736xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012737 xmlDocPtr ret;
12738 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012739 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012740
Daniel Veillard38936062004-11-04 17:45:11 +000012741 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012742
12743
12744 ctxt = xmlCreateDocParserCtxt(cur);
12745 if (ctxt == NULL) return(NULL);
12746 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012747 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012748 ctxt->sax = sax;
12749 ctxt->userData = NULL;
12750 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012751 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012752
12753 xmlParseDocument(ctxt);
12754 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12755 else {
12756 ret = NULL;
12757 xmlFreeDoc(ctxt->myDoc);
12758 ctxt->myDoc = NULL;
12759 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012760 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012761 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012762 xmlFreeParserCtxt(ctxt);
12763
12764 return(ret);
12765}
12766
12767/**
12768 * xmlParseDoc:
12769 * @cur: a pointer to an array of xmlChar
12770 *
12771 * parse an XML in-memory document and build a tree.
12772 *
12773 * Returns the resulting document tree
12774 */
12775
12776xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012777xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012778 return(xmlSAXParseDoc(NULL, cur, 0));
12779}
Daniel Veillard81273902003-09-30 00:43:48 +000012780#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012781
Daniel Veillard81273902003-09-30 00:43:48 +000012782#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012783/************************************************************************
12784 * *
12785 * Specific function to keep track of entities references *
12786 * and used by the XSLT debugger *
12787 * *
12788 ************************************************************************/
12789
12790static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12791
12792/**
12793 * xmlAddEntityReference:
12794 * @ent : A valid entity
12795 * @firstNode : A valid first node for children of entity
12796 * @lastNode : A valid last node of children entity
12797 *
12798 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12799 */
12800static void
12801xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12802 xmlNodePtr lastNode)
12803{
12804 if (xmlEntityRefFunc != NULL) {
12805 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12806 }
12807}
12808
12809
12810/**
12811 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012812 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012813 *
12814 * Set the function to call call back when a xml reference has been made
12815 */
12816void
12817xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12818{
12819 xmlEntityRefFunc = func;
12820}
Daniel Veillard81273902003-09-30 00:43:48 +000012821#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012822
12823/************************************************************************
12824 * *
12825 * Miscellaneous *
12826 * *
12827 ************************************************************************/
12828
12829#ifdef LIBXML_XPATH_ENABLED
12830#include <libxml/xpath.h>
12831#endif
12832
Daniel Veillardffa3c742005-07-21 13:24:09 +000012833extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012834static int xmlParserInitialized = 0;
12835
12836/**
12837 * xmlInitParser:
12838 *
12839 * Initialization function for the XML parser.
12840 * This is not reentrant. Call once before processing in case of
12841 * use in multithreaded programs.
12842 */
12843
12844void
12845xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012846 if (xmlParserInitialized != 0)
12847 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012848
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012849#ifdef LIBXML_THREAD_ENABLED
12850 __xmlGlobalInitMutexLock();
12851 if (xmlParserInitialized == 0) {
12852#endif
12853 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12854 (xmlGenericError == NULL))
12855 initGenericErrorDefaultFunc(NULL);
12856 xmlInitGlobals();
12857 xmlInitThreads();
12858 xmlInitMemory();
12859 xmlInitCharEncodingHandlers();
12860 xmlDefaultSAXHandlerInit();
12861 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012862#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012863 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012864#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012865#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012866 htmlInitAutoClose();
12867 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012868#endif
12869#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012870 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000012871#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000012872 xmlParserInitialized = 1;
12873#ifdef LIBXML_THREAD_ENABLED
12874 }
12875 __xmlGlobalInitMutexUnlock();
12876#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012877}
12878
12879/**
12880 * xmlCleanupParser:
12881 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012882 * Cleanup function for the XML library. It tries to reclaim all
12883 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012884 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012885 * function should not prevent reusing the library but one should
12886 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012887 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012888 */
12889
12890void
12891xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012892 if (!xmlParserInitialized)
12893 return;
12894
Owen Taylor3473f882001-02-23 17:55:21 +000012895 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012896#ifdef LIBXML_CATALOG_ENABLED
12897 xmlCatalogCleanup();
12898#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012899 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012900 xmlCleanupInputCallbacks();
12901#ifdef LIBXML_OUTPUT_ENABLED
12902 xmlCleanupOutputCallbacks();
12903#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012904#ifdef LIBXML_SCHEMAS_ENABLED
12905 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012906 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012907#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012908 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012909 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012910 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012911 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012912 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012913}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012914
12915/************************************************************************
12916 * *
12917 * New set (2.6.0) of simpler and more flexible APIs *
12918 * *
12919 ************************************************************************/
12920
12921/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012922 * DICT_FREE:
12923 * @str: a string
12924 *
12925 * Free a string if it is not owned by the "dict" dictionnary in the
12926 * current scope
12927 */
12928#define DICT_FREE(str) \
12929 if ((str) && ((!dict) || \
12930 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12931 xmlFree((char *)(str));
12932
12933/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012934 * xmlCtxtReset:
12935 * @ctxt: an XML parser context
12936 *
12937 * Reset a parser context
12938 */
12939void
12940xmlCtxtReset(xmlParserCtxtPtr ctxt)
12941{
12942 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012943 xmlDictPtr dict;
12944
12945 if (ctxt == NULL)
12946 return;
12947
12948 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012949
12950 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12951 xmlFreeInputStream(input);
12952 }
12953 ctxt->inputNr = 0;
12954 ctxt->input = NULL;
12955
12956 ctxt->spaceNr = 0;
12957 ctxt->spaceTab[0] = -1;
12958 ctxt->space = &ctxt->spaceTab[0];
12959
12960
12961 ctxt->nodeNr = 0;
12962 ctxt->node = NULL;
12963
12964 ctxt->nameNr = 0;
12965 ctxt->name = NULL;
12966
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012967 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012968 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012969 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012970 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012971 DICT_FREE(ctxt->directory);
12972 ctxt->directory = NULL;
12973 DICT_FREE(ctxt->extSubURI);
12974 ctxt->extSubURI = NULL;
12975 DICT_FREE(ctxt->extSubSystem);
12976 ctxt->extSubSystem = NULL;
12977 if (ctxt->myDoc != NULL)
12978 xmlFreeDoc(ctxt->myDoc);
12979 ctxt->myDoc = NULL;
12980
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012981 ctxt->standalone = -1;
12982 ctxt->hasExternalSubset = 0;
12983 ctxt->hasPErefs = 0;
12984 ctxt->html = 0;
12985 ctxt->external = 0;
12986 ctxt->instate = XML_PARSER_START;
12987 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012988
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012989 ctxt->wellFormed = 1;
12990 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012991 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012992 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012993#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012994 ctxt->vctxt.userData = ctxt;
12995 ctxt->vctxt.error = xmlParserValidityError;
12996 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012997#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012998 ctxt->record_info = 0;
12999 ctxt->nbChars = 0;
13000 ctxt->checkIndex = 0;
13001 ctxt->inSubset = 0;
13002 ctxt->errNo = XML_ERR_OK;
13003 ctxt->depth = 0;
13004 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13005 ctxt->catalogs = NULL;
13006 xmlInitNodeInfoSeq(&ctxt->node_seq);
13007
13008 if (ctxt->attsDefault != NULL) {
13009 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13010 ctxt->attsDefault = NULL;
13011 }
13012 if (ctxt->attsSpecial != NULL) {
13013 xmlHashFree(ctxt->attsSpecial, NULL);
13014 ctxt->attsSpecial = NULL;
13015 }
13016
Daniel Veillard4432df22003-09-28 18:58:27 +000013017#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013018 if (ctxt->catalogs != NULL)
13019 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013020#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013021 if (ctxt->lastError.code != XML_ERR_OK)
13022 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013023}
13024
13025/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013026 * xmlCtxtResetPush:
13027 * @ctxt: an XML parser context
13028 * @chunk: a pointer to an array of chars
13029 * @size: number of chars in the array
13030 * @filename: an optional file name or URI
13031 * @encoding: the document encoding, or NULL
13032 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013033 * Reset a push parser context
13034 *
13035 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013036 */
13037int
13038xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13039 int size, const char *filename, const char *encoding)
13040{
13041 xmlParserInputPtr inputStream;
13042 xmlParserInputBufferPtr buf;
13043 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13044
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013045 if (ctxt == NULL)
13046 return(1);
13047
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013048 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13049 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13050
13051 buf = xmlAllocParserInputBuffer(enc);
13052 if (buf == NULL)
13053 return(1);
13054
13055 if (ctxt == NULL) {
13056 xmlFreeParserInputBuffer(buf);
13057 return(1);
13058 }
13059
13060 xmlCtxtReset(ctxt);
13061
13062 if (ctxt->pushTab == NULL) {
13063 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13064 sizeof(xmlChar *));
13065 if (ctxt->pushTab == NULL) {
13066 xmlErrMemory(ctxt, NULL);
13067 xmlFreeParserInputBuffer(buf);
13068 return(1);
13069 }
13070 }
13071
13072 if (filename == NULL) {
13073 ctxt->directory = NULL;
13074 } else {
13075 ctxt->directory = xmlParserGetDirectory(filename);
13076 }
13077
13078 inputStream = xmlNewInputStream(ctxt);
13079 if (inputStream == NULL) {
13080 xmlFreeParserInputBuffer(buf);
13081 return(1);
13082 }
13083
13084 if (filename == NULL)
13085 inputStream->filename = NULL;
13086 else
13087 inputStream->filename = (char *)
13088 xmlCanonicPath((const xmlChar *) filename);
13089 inputStream->buf = buf;
13090 inputStream->base = inputStream->buf->buffer->content;
13091 inputStream->cur = inputStream->buf->buffer->content;
13092 inputStream->end =
13093 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13094
13095 inputPush(ctxt, inputStream);
13096
13097 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13098 (ctxt->input->buf != NULL)) {
13099 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13100 int cur = ctxt->input->cur - ctxt->input->base;
13101
13102 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13103
13104 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13105 ctxt->input->cur = ctxt->input->base + cur;
13106 ctxt->input->end =
13107 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13108 use];
13109#ifdef DEBUG_PUSH
13110 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13111#endif
13112 }
13113
13114 if (encoding != NULL) {
13115 xmlCharEncodingHandlerPtr hdlr;
13116
13117 hdlr = xmlFindCharEncodingHandler(encoding);
13118 if (hdlr != NULL) {
13119 xmlSwitchToEncoding(ctxt, hdlr);
13120 } else {
13121 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13122 "Unsupported encoding %s\n", BAD_CAST encoding);
13123 }
13124 } else if (enc != XML_CHAR_ENCODING_NONE) {
13125 xmlSwitchEncoding(ctxt, enc);
13126 }
13127
13128 return(0);
13129}
13130
13131/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013132 * xmlCtxtUseOptions:
13133 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013134 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013135 *
13136 * Applies the options to the parser context
13137 *
13138 * Returns 0 in case of success, the set of unknown or unimplemented options
13139 * in case of error.
13140 */
13141int
13142xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13143{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013144 if (ctxt == NULL)
13145 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013146 if (options & XML_PARSE_RECOVER) {
13147 ctxt->recovery = 1;
13148 options -= XML_PARSE_RECOVER;
13149 } else
13150 ctxt->recovery = 0;
13151 if (options & XML_PARSE_DTDLOAD) {
13152 ctxt->loadsubset = XML_DETECT_IDS;
13153 options -= XML_PARSE_DTDLOAD;
13154 } else
13155 ctxt->loadsubset = 0;
13156 if (options & XML_PARSE_DTDATTR) {
13157 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13158 options -= XML_PARSE_DTDATTR;
13159 }
13160 if (options & XML_PARSE_NOENT) {
13161 ctxt->replaceEntities = 1;
13162 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13163 options -= XML_PARSE_NOENT;
13164 } else
13165 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013166 if (options & XML_PARSE_PEDANTIC) {
13167 ctxt->pedantic = 1;
13168 options -= XML_PARSE_PEDANTIC;
13169 } else
13170 ctxt->pedantic = 0;
13171 if (options & XML_PARSE_NOBLANKS) {
13172 ctxt->keepBlanks = 0;
13173 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13174 options -= XML_PARSE_NOBLANKS;
13175 } else
13176 ctxt->keepBlanks = 1;
13177 if (options & XML_PARSE_DTDVALID) {
13178 ctxt->validate = 1;
13179 if (options & XML_PARSE_NOWARNING)
13180 ctxt->vctxt.warning = NULL;
13181 if (options & XML_PARSE_NOERROR)
13182 ctxt->vctxt.error = NULL;
13183 options -= XML_PARSE_DTDVALID;
13184 } else
13185 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013186 if (options & XML_PARSE_NOWARNING) {
13187 ctxt->sax->warning = NULL;
13188 options -= XML_PARSE_NOWARNING;
13189 }
13190 if (options & XML_PARSE_NOERROR) {
13191 ctxt->sax->error = NULL;
13192 ctxt->sax->fatalError = NULL;
13193 options -= XML_PARSE_NOERROR;
13194 }
Daniel Veillard81273902003-09-30 00:43:48 +000013195#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013196 if (options & XML_PARSE_SAX1) {
13197 ctxt->sax->startElement = xmlSAX2StartElement;
13198 ctxt->sax->endElement = xmlSAX2EndElement;
13199 ctxt->sax->startElementNs = NULL;
13200 ctxt->sax->endElementNs = NULL;
13201 ctxt->sax->initialized = 1;
13202 options -= XML_PARSE_SAX1;
13203 }
Daniel Veillard81273902003-09-30 00:43:48 +000013204#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013205 if (options & XML_PARSE_NODICT) {
13206 ctxt->dictNames = 0;
13207 options -= XML_PARSE_NODICT;
13208 } else {
13209 ctxt->dictNames = 1;
13210 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013211 if (options & XML_PARSE_NOCDATA) {
13212 ctxt->sax->cdataBlock = NULL;
13213 options -= XML_PARSE_NOCDATA;
13214 }
13215 if (options & XML_PARSE_NSCLEAN) {
13216 ctxt->options |= XML_PARSE_NSCLEAN;
13217 options -= XML_PARSE_NSCLEAN;
13218 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013219 if (options & XML_PARSE_NONET) {
13220 ctxt->options |= XML_PARSE_NONET;
13221 options -= XML_PARSE_NONET;
13222 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013223 if (options & XML_PARSE_COMPACT) {
13224 ctxt->options |= XML_PARSE_COMPACT;
13225 options -= XML_PARSE_COMPACT;
13226 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013227 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013228 return (options);
13229}
13230
13231/**
13232 * xmlDoRead:
13233 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013234 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013235 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013236 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013237 * @reuse: keep the context for reuse
13238 *
13239 * Common front-end for the xmlRead functions
13240 *
13241 * Returns the resulting document tree or NULL
13242 */
13243static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013244xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13245 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246{
13247 xmlDocPtr ret;
13248
13249 xmlCtxtUseOptions(ctxt, options);
13250 if (encoding != NULL) {
13251 xmlCharEncodingHandlerPtr hdlr;
13252
13253 hdlr = xmlFindCharEncodingHandler(encoding);
13254 if (hdlr != NULL)
13255 xmlSwitchToEncoding(ctxt, hdlr);
13256 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013257 if ((URL != NULL) && (ctxt->input != NULL) &&
13258 (ctxt->input->filename == NULL))
13259 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013260 xmlParseDocument(ctxt);
13261 if ((ctxt->wellFormed) || ctxt->recovery)
13262 ret = ctxt->myDoc;
13263 else {
13264 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013265 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013266 xmlFreeDoc(ctxt->myDoc);
13267 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013268 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013269 ctxt->myDoc = NULL;
13270 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013272 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013273
13274 return (ret);
13275}
13276
13277/**
13278 * xmlReadDoc:
13279 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013280 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013281 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013282 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013283 *
13284 * parse an XML in-memory document and build a tree.
13285 *
13286 * Returns the resulting document tree
13287 */
13288xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013289xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013290{
13291 xmlParserCtxtPtr ctxt;
13292
13293 if (cur == NULL)
13294 return (NULL);
13295
13296 ctxt = xmlCreateDocParserCtxt(cur);
13297 if (ctxt == NULL)
13298 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013299 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013300}
13301
13302/**
13303 * xmlReadFile:
13304 * @filename: a file or URL
13305 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013306 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013307 *
13308 * parse an XML file from the filesystem or the network.
13309 *
13310 * Returns the resulting document tree
13311 */
13312xmlDocPtr
13313xmlReadFile(const char *filename, const char *encoding, int options)
13314{
13315 xmlParserCtxtPtr ctxt;
13316
Daniel Veillard61b93382003-11-03 14:28:31 +000013317 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013318 if (ctxt == NULL)
13319 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013320 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013321}
13322
13323/**
13324 * xmlReadMemory:
13325 * @buffer: a pointer to a char array
13326 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013327 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013328 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013329 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013330 *
13331 * parse an XML in-memory document and build a tree.
13332 *
13333 * Returns the resulting document tree
13334 */
13335xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013336xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013337{
13338 xmlParserCtxtPtr ctxt;
13339
13340 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13341 if (ctxt == NULL)
13342 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013343 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013344}
13345
13346/**
13347 * xmlReadFd:
13348 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013349 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013350 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013351 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013352 *
13353 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013354 * NOTE that the file descriptor will not be closed when the
13355 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013356 *
13357 * Returns the resulting document tree
13358 */
13359xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013360xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013361{
13362 xmlParserCtxtPtr ctxt;
13363 xmlParserInputBufferPtr input;
13364 xmlParserInputPtr stream;
13365
13366 if (fd < 0)
13367 return (NULL);
13368
13369 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13370 if (input == NULL)
13371 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013372 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013373 ctxt = xmlNewParserCtxt();
13374 if (ctxt == NULL) {
13375 xmlFreeParserInputBuffer(input);
13376 return (NULL);
13377 }
13378 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13379 if (stream == NULL) {
13380 xmlFreeParserInputBuffer(input);
13381 xmlFreeParserCtxt(ctxt);
13382 return (NULL);
13383 }
13384 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013385 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013386}
13387
13388/**
13389 * xmlReadIO:
13390 * @ioread: an I/O read function
13391 * @ioclose: an I/O close function
13392 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013393 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013394 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013395 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013396 *
13397 * parse an XML document from I/O functions and source and build a tree.
13398 *
13399 * Returns the resulting document tree
13400 */
13401xmlDocPtr
13402xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013403 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013404{
13405 xmlParserCtxtPtr ctxt;
13406 xmlParserInputBufferPtr input;
13407 xmlParserInputPtr stream;
13408
13409 if (ioread == NULL)
13410 return (NULL);
13411
13412 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13413 XML_CHAR_ENCODING_NONE);
13414 if (input == NULL)
13415 return (NULL);
13416 ctxt = xmlNewParserCtxt();
13417 if (ctxt == NULL) {
13418 xmlFreeParserInputBuffer(input);
13419 return (NULL);
13420 }
13421 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13422 if (stream == NULL) {
13423 xmlFreeParserInputBuffer(input);
13424 xmlFreeParserCtxt(ctxt);
13425 return (NULL);
13426 }
13427 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013428 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013429}
13430
13431/**
13432 * xmlCtxtReadDoc:
13433 * @ctxt: an XML parser context
13434 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013435 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013436 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013437 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013438 *
13439 * parse an XML in-memory document and build a tree.
13440 * This reuses the existing @ctxt parser context
13441 *
13442 * Returns the resulting document tree
13443 */
13444xmlDocPtr
13445xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013446 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013447{
13448 xmlParserInputPtr stream;
13449
13450 if (cur == NULL)
13451 return (NULL);
13452 if (ctxt == NULL)
13453 return (NULL);
13454
13455 xmlCtxtReset(ctxt);
13456
13457 stream = xmlNewStringInputStream(ctxt, cur);
13458 if (stream == NULL) {
13459 return (NULL);
13460 }
13461 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013462 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013463}
13464
13465/**
13466 * xmlCtxtReadFile:
13467 * @ctxt: an XML parser context
13468 * @filename: a file or URL
13469 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013470 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013471 *
13472 * parse an XML file from the filesystem or the network.
13473 * This reuses the existing @ctxt parser context
13474 *
13475 * Returns the resulting document tree
13476 */
13477xmlDocPtr
13478xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13479 const char *encoding, int options)
13480{
13481 xmlParserInputPtr stream;
13482
13483 if (filename == NULL)
13484 return (NULL);
13485 if (ctxt == NULL)
13486 return (NULL);
13487
13488 xmlCtxtReset(ctxt);
13489
Daniel Veillard29614c72004-11-26 10:47:26 +000013490 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013491 if (stream == NULL) {
13492 return (NULL);
13493 }
13494 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013495 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013496}
13497
13498/**
13499 * xmlCtxtReadMemory:
13500 * @ctxt: an XML parser context
13501 * @buffer: a pointer to a char array
13502 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013503 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013504 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013505 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013506 *
13507 * parse an XML in-memory document and build a tree.
13508 * This reuses the existing @ctxt parser context
13509 *
13510 * Returns the resulting document tree
13511 */
13512xmlDocPtr
13513xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013514 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013515{
13516 xmlParserInputBufferPtr input;
13517 xmlParserInputPtr stream;
13518
13519 if (ctxt == NULL)
13520 return (NULL);
13521 if (buffer == NULL)
13522 return (NULL);
13523
13524 xmlCtxtReset(ctxt);
13525
13526 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13527 if (input == NULL) {
13528 return(NULL);
13529 }
13530
13531 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13532 if (stream == NULL) {
13533 xmlFreeParserInputBuffer(input);
13534 return(NULL);
13535 }
13536
13537 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013538 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013539}
13540
13541/**
13542 * xmlCtxtReadFd:
13543 * @ctxt: an XML parser context
13544 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013545 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013546 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013547 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013548 *
13549 * parse an XML from a file descriptor and build a tree.
13550 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013551 * NOTE that the file descriptor will not be closed when the
13552 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013553 *
13554 * Returns the resulting document tree
13555 */
13556xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013557xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13558 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013559{
13560 xmlParserInputBufferPtr input;
13561 xmlParserInputPtr stream;
13562
13563 if (fd < 0)
13564 return (NULL);
13565 if (ctxt == NULL)
13566 return (NULL);
13567
13568 xmlCtxtReset(ctxt);
13569
13570
13571 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13572 if (input == NULL)
13573 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013574 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013575 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13576 if (stream == NULL) {
13577 xmlFreeParserInputBuffer(input);
13578 return (NULL);
13579 }
13580 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013581 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013582}
13583
13584/**
13585 * xmlCtxtReadIO:
13586 * @ctxt: an XML parser context
13587 * @ioread: an I/O read function
13588 * @ioclose: an I/O close function
13589 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013590 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013591 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013592 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013593 *
13594 * parse an XML document from I/O functions and source and build a tree.
13595 * This reuses the existing @ctxt parser context
13596 *
13597 * Returns the resulting document tree
13598 */
13599xmlDocPtr
13600xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13601 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013602 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013603 const char *encoding, int options)
13604{
13605 xmlParserInputBufferPtr input;
13606 xmlParserInputPtr stream;
13607
13608 if (ioread == NULL)
13609 return (NULL);
13610 if (ctxt == NULL)
13611 return (NULL);
13612
13613 xmlCtxtReset(ctxt);
13614
13615 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13616 XML_CHAR_ENCODING_NONE);
13617 if (input == NULL)
13618 return (NULL);
13619 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13620 if (stream == NULL) {
13621 xmlFreeParserInputBuffer(input);
13622 return (NULL);
13623 }
13624 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013625 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013626}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013627
13628#define bottom_parser
13629#include "elfgcchack.h"