blob: 5f561ee85774fe77b9e530fc532c1fb9b53735da [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000586 * Library wide options *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlHasFeature:
592 * @feature: the feature to be examined
593 *
594 * Examines if the library has been compiled with a given feature.
595 *
596 * Returns a non-zero value if the feature exist, otherwise zero.
597 * Returns zero (0) if the feature does not exist or an unknown
598 * unknown feature is requested, non-zero otherwise.
599 */
600int
601xmlHasFeature(xmlFeature feature)
602{
603 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000604 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000605#ifdef LIBXML_THREAD_ENABLED
606 return(1);
607#else
608 return(0);
609#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000610 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000611#ifdef LIBXML_TREE_ENABLED
612 return(1);
613#else
614 return(0);
615#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000616 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000617#ifdef LIBXML_OUTPUT_ENABLED
618 return(1);
619#else
620 return(0);
621#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000622 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000623#ifdef LIBXML_PUSH_ENABLED
624 return(1);
625#else
626 return(0);
627#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000628 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000629#ifdef LIBXML_READER_ENABLED
630 return(1);
631#else
632 return(0);
633#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000634 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000635#ifdef LIBXML_PATTERN_ENABLED
636 return(1);
637#else
638 return(0);
639#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000640 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000641#ifdef LIBXML_WRITER_ENABLED
642 return(1);
643#else
644 return(0);
645#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000646 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000647#ifdef LIBXML_SAX1_ENABLED
648 return(1);
649#else
650 return(0);
651#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000652 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000653#ifdef LIBXML_FTP_ENABLED
654 return(1);
655#else
656 return(0);
657#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000658 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000659#ifdef LIBXML_HTTP_ENABLED
660 return(1);
661#else
662 return(0);
663#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_VALID_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_HTML_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_LEGACY_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_C14N_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_CATALOG_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_XPATH_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_XPTR_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_XINCLUDE_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_ICONV_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_ISO8859X_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_UNICODE_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_REGEXP_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_AUTOMATA_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_EXPR_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_SCHEMAS_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_SCHEMATRON_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_MODULES_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_DEBUG_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef DEBUG_MEMORY_LOCATION
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_DEBUG_RUNTIME
780 return(1);
781#else
782 return(0);
783#endif
784 default:
785 break;
786 }
787 return(0);
788}
789
790/************************************************************************
791 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000792 * SAX2 defaulted attributes handling *
793 * *
794 ************************************************************************/
795
796/**
797 * xmlDetectSAX2:
798 * @ctxt: an XML parser context
799 *
800 * Do the SAX2 detection and specific intialization
801 */
802static void
803xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
804 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000805#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000806 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
807 ((ctxt->sax->startElementNs != NULL) ||
808 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000809#else
810 ctxt->sax2 = 1;
811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000812
813 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
814 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
815 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000816 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
817 (ctxt->str_xml_ns == NULL)) {
818 xmlErrMemory(ctxt, NULL);
819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000820}
821
Daniel Veillarde57ec792003-09-10 10:50:59 +0000822typedef struct _xmlDefAttrs xmlDefAttrs;
823typedef xmlDefAttrs *xmlDefAttrsPtr;
824struct _xmlDefAttrs {
825 int nbAttrs; /* number of defaulted attributes on that element */
826 int maxAttrs; /* the size of the array */
827 const xmlChar *values[4]; /* array of localname/prefix/values */
828};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000829
830/**
831 * xmlAddDefAttrs:
832 * @ctxt: an XML parser context
833 * @fullname: the element fullname
834 * @fullattr: the attribute fullname
835 * @value: the attribute value
836 *
837 * Add a defaulted attribute for an element
838 */
839static void
840xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
841 const xmlChar *fullname,
842 const xmlChar *fullattr,
843 const xmlChar *value) {
844 xmlDefAttrsPtr defaults;
845 int len;
846 const xmlChar *name;
847 const xmlChar *prefix;
848
849 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000850 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000851 if (ctxt->attsDefault == NULL)
852 goto mem_error;
853 }
854
855 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000856 * split the element name into prefix:localname , the string found
857 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 */
859 name = xmlSplitQName3(fullname, &len);
860 if (name == NULL) {
861 name = xmlDictLookup(ctxt->dict, fullname, -1);
862 prefix = NULL;
863 } else {
864 name = xmlDictLookup(ctxt->dict, name, -1);
865 prefix = xmlDictLookup(ctxt->dict, fullname, len);
866 }
867
868 /*
869 * make sure there is some storage
870 */
871 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
872 if (defaults == NULL) {
873 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000874 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000875 if (defaults == NULL)
876 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000877 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000878 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000879 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
880 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000881 xmlDefAttrsPtr temp;
882
883 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000884 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000885 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000887 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888 defaults->maxAttrs *= 2;
889 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
890 }
891
892 /*
Daniel Veillard8874b942005-08-25 13:19:21 +0000893 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894 * are within the DTD and hen not associated to namespace names.
895 */
896 name = xmlSplitQName3(fullattr, &len);
897 if (name == NULL) {
898 name = xmlDictLookup(ctxt->dict, fullattr, -1);
899 prefix = NULL;
900 } else {
901 name = xmlDictLookup(ctxt->dict, name, -1);
902 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
903 }
904
905 defaults->values[4 * defaults->nbAttrs] = name;
906 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
907 /* intern the string and precompute the end */
908 len = xmlStrlen(value);
909 value = xmlDictLookup(ctxt->dict, value, len);
910 defaults->values[4 * defaults->nbAttrs + 2] = value;
911 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
912 defaults->nbAttrs++;
913
914 return;
915
916mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000917 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 return;
919}
920
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000921/**
922 * xmlAddSpecialAttr:
923 * @ctxt: an XML parser context
924 * @fullname: the element fullname
925 * @fullattr: the attribute fullname
926 * @type: the attribute type
927 *
928 * Register that this attribute is not CDATA
929 */
930static void
931xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
932 const xmlChar *fullname,
933 const xmlChar *fullattr,
934 int type)
935{
936 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000937 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000938 if (ctxt->attsSpecial == NULL)
939 goto mem_error;
940 }
941
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000942 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
943 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000944 return;
945
946mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000947 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000948 return;
949}
950
Daniel Veillard4432df22003-09-28 18:58:27 +0000951/**
952 * xmlCheckLanguageID:
953 * @lang: pointer to the string value
954 *
955 * Checks that the value conforms to the LanguageID production:
956 *
957 * NOTE: this is somewhat deprecated, those productions were removed from
958 * the XML Second edition.
959 *
960 * [33] LanguageID ::= Langcode ('-' Subcode)*
961 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
962 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
963 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
964 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
965 * [38] Subcode ::= ([a-z] | [A-Z])+
966 *
967 * Returns 1 if correct 0 otherwise
968 **/
969int
970xmlCheckLanguageID(const xmlChar * lang)
971{
972 const xmlChar *cur = lang;
973
974 if (cur == NULL)
975 return (0);
976 if (((cur[0] == 'i') && (cur[1] == '-')) ||
977 ((cur[0] == 'I') && (cur[1] == '-'))) {
978 /*
979 * IANA code
980 */
981 cur += 2;
982 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
983 ((cur[0] >= 'a') && (cur[0] <= 'z')))
984 cur++;
985 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
986 ((cur[0] == 'X') && (cur[1] == '-'))) {
987 /*
988 * User code
989 */
990 cur += 2;
991 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
992 ((cur[0] >= 'a') && (cur[0] <= 'z')))
993 cur++;
994 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
995 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
996 /*
997 * ISO639
998 */
999 cur++;
1000 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1001 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1002 cur++;
1003 else
1004 return (0);
1005 } else
1006 return (0);
1007 while (cur[0] != 0) { /* non input consuming */
1008 if (cur[0] != '-')
1009 return (0);
1010 cur++;
1011 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1012 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1013 cur++;
1014 else
1015 return (0);
1016 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1017 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1018 cur++;
1019 }
1020 return (1);
1021}
1022
Owen Taylor3473f882001-02-23 17:55:21 +00001023/************************************************************************
1024 * *
1025 * Parser stacks related functions and macros *
1026 * *
1027 ************************************************************************/
1028
1029xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1030 const xmlChar ** str);
1031
Daniel Veillard0fb18932003-09-07 09:14:37 +00001032#ifdef SAX2
1033/**
1034 * nsPush:
1035 * @ctxt: an XML parser context
1036 * @prefix: the namespace prefix or NULL
1037 * @URL: the namespace name
1038 *
1039 * Pushes a new parser namespace on top of the ns stack
1040 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001041 * Returns -1 in case of error, -2 if the namespace should be discarded
1042 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001043 */
1044static int
1045nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1046{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001047 if (ctxt->options & XML_PARSE_NSCLEAN) {
1048 int i;
1049 for (i = 0;i < ctxt->nsNr;i += 2) {
1050 if (ctxt->nsTab[i] == prefix) {
1051 /* in scope */
1052 if (ctxt->nsTab[i + 1] == URL)
1053 return(-2);
1054 /* out of scope keep it */
1055 break;
1056 }
1057 }
1058 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001059 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1060 ctxt->nsMax = 10;
1061 ctxt->nsNr = 0;
1062 ctxt->nsTab = (const xmlChar **)
1063 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1064 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001065 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001066 ctxt->nsMax = 0;
1067 return (-1);
1068 }
1069 } else if (ctxt->nsNr >= ctxt->nsMax) {
1070 ctxt->nsMax *= 2;
1071 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +00001072 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +00001073 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1074 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001075 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001076 ctxt->nsMax /= 2;
1077 return (-1);
1078 }
1079 }
1080 ctxt->nsTab[ctxt->nsNr++] = prefix;
1081 ctxt->nsTab[ctxt->nsNr++] = URL;
1082 return (ctxt->nsNr);
1083}
1084/**
1085 * nsPop:
1086 * @ctxt: an XML parser context
1087 * @nr: the number to pop
1088 *
1089 * Pops the top @nr parser prefix/namespace from the ns stack
1090 *
1091 * Returns the number of namespaces removed
1092 */
1093static int
1094nsPop(xmlParserCtxtPtr ctxt, int nr)
1095{
1096 int i;
1097
1098 if (ctxt->nsTab == NULL) return(0);
1099 if (ctxt->nsNr < nr) {
1100 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1101 nr = ctxt->nsNr;
1102 }
1103 if (ctxt->nsNr <= 0)
1104 return (0);
1105
1106 for (i = 0;i < nr;i++) {
1107 ctxt->nsNr--;
1108 ctxt->nsTab[ctxt->nsNr] = NULL;
1109 }
1110 return(nr);
1111}
1112#endif
1113
1114static int
1115xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1116 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001118 int maxatts;
1119
1120 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001121 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001122 atts = (const xmlChar **)
1123 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001125 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1127 if (attallocs == NULL) goto mem_error;
1128 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001129 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130 } else if (nr + 5 > ctxt->maxatts) {
1131 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001132 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1133 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001135 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001136 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1137 (maxatts / 5) * sizeof(int));
1138 if (attallocs == NULL) goto mem_error;
1139 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001140 ctxt->maxatts = maxatts;
1141 }
1142 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001144 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001146}
1147
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001148/**
1149 * inputPush:
1150 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001151 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001152 *
1153 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001154 *
1155 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001156 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001157int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1159{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001160 if ((ctxt == NULL) || (value == NULL))
1161 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001162 if (ctxt->inputNr >= ctxt->inputMax) {
1163 ctxt->inputMax *= 2;
1164 ctxt->inputTab =
1165 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1166 ctxt->inputMax *
1167 sizeof(ctxt->inputTab[0]));
1168 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001169 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001170 return (0);
1171 }
1172 }
1173 ctxt->inputTab[ctxt->inputNr] = value;
1174 ctxt->input = value;
1175 return (ctxt->inputNr++);
1176}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001177/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001179 * @ctxt: an XML parser context
1180 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001182 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001183 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001184 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001185xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001186inputPop(xmlParserCtxtPtr ctxt)
1187{
1188 xmlParserInputPtr ret;
1189
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001190 if (ctxt == NULL)
1191 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001192 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001193 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001194 ctxt->inputNr--;
1195 if (ctxt->inputNr > 0)
1196 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1197 else
1198 ctxt->input = NULL;
1199 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001200 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001201 return (ret);
1202}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001203/**
1204 * nodePush:
1205 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001206 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001207 *
1208 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001209 *
1210 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001211 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001212int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001213nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1214{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001215 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001216 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001217 xmlNodePtr *tmp;
1218
1219 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1220 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001221 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001222 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001223 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001224 return (0);
1225 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001226 ctxt->nodeTab = tmp;
1227 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001228 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001229 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001230 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001231 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1232 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001233 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001234 return(0);
1235 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001236 ctxt->nodeTab[ctxt->nodeNr] = value;
1237 ctxt->node = value;
1238 return (ctxt->nodeNr++);
1239}
1240/**
1241 * nodePop:
1242 * @ctxt: an XML parser context
1243 *
1244 * Pops the top element node from the node stack
1245 *
1246 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001247 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001248xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001249nodePop(xmlParserCtxtPtr ctxt)
1250{
1251 xmlNodePtr ret;
1252
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001253 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001254 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001255 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001256 ctxt->nodeNr--;
1257 if (ctxt->nodeNr > 0)
1258 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1259 else
1260 ctxt->node = NULL;
1261 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001262 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001263 return (ret);
1264}
Daniel Veillarda2351322004-06-27 12:08:10 +00001265
1266#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001267/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 * nameNsPush:
1269 * @ctxt: an XML parser context
1270 * @value: the element name
1271 * @prefix: the element prefix
1272 * @URI: the element namespace name
1273 *
1274 * Pushes a new element name/prefix/URL on top of the name stack
1275 *
1276 * Returns -1 in case of error, the index in the stack otherwise
1277 */
1278static int
1279nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1280 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1281{
1282 if (ctxt->nameNr >= ctxt->nameMax) {
1283 const xmlChar * *tmp;
1284 void **tmp2;
1285 ctxt->nameMax *= 2;
1286 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1287 ctxt->nameMax *
1288 sizeof(ctxt->nameTab[0]));
1289 if (tmp == NULL) {
1290 ctxt->nameMax /= 2;
1291 goto mem_error;
1292 }
1293 ctxt->nameTab = tmp;
1294 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1295 ctxt->nameMax * 3 *
1296 sizeof(ctxt->pushTab[0]));
1297 if (tmp2 == NULL) {
1298 ctxt->nameMax /= 2;
1299 goto mem_error;
1300 }
1301 ctxt->pushTab = tmp2;
1302 }
1303 ctxt->nameTab[ctxt->nameNr] = value;
1304 ctxt->name = value;
1305 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1306 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001307 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001308 return (ctxt->nameNr++);
1309mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001310 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 return (-1);
1312}
1313/**
1314 * nameNsPop:
1315 * @ctxt: an XML parser context
1316 *
1317 * Pops the top element/prefix/URI name from the name stack
1318 *
1319 * Returns the name just removed
1320 */
1321static const xmlChar *
1322nameNsPop(xmlParserCtxtPtr ctxt)
1323{
1324 const xmlChar *ret;
1325
1326 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001327 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001328 ctxt->nameNr--;
1329 if (ctxt->nameNr > 0)
1330 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1331 else
1332 ctxt->name = NULL;
1333 ret = ctxt->nameTab[ctxt->nameNr];
1334 ctxt->nameTab[ctxt->nameNr] = NULL;
1335 return (ret);
1336}
Daniel Veillarda2351322004-06-27 12:08:10 +00001337#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338
1339/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001340 * namePush:
1341 * @ctxt: an XML parser context
1342 * @value: the element name
1343 *
1344 * Pushes a new element name on top of the name stack
1345 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001346 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001347 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001348int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001349namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001350{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001351 if (ctxt == NULL) return (-1);
1352
Daniel Veillard1c732d22002-11-30 11:22:59 +00001353 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001354 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001355 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001356 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001357 ctxt->nameMax *
1358 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001359 if (tmp == NULL) {
1360 ctxt->nameMax /= 2;
1361 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001364 }
1365 ctxt->nameTab[ctxt->nameNr] = value;
1366 ctxt->name = value;
1367 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001368mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001369 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001371}
1372/**
1373 * namePop:
1374 * @ctxt: an XML parser context
1375 *
1376 * Pops the top element name from the name stack
1377 *
1378 * Returns the name just removed
1379 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001380const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001381namePop(xmlParserCtxtPtr ctxt)
1382{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001383 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001384
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001385 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1386 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001387 ctxt->nameNr--;
1388 if (ctxt->nameNr > 0)
1389 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1390 else
1391 ctxt->name = NULL;
1392 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001393 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001394 return (ret);
1395}
Owen Taylor3473f882001-02-23 17:55:21 +00001396
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001397static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001398 if (ctxt->spaceNr >= ctxt->spaceMax) {
1399 ctxt->spaceMax *= 2;
1400 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1401 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1402 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001403 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001404 return(0);
1405 }
1406 }
1407 ctxt->spaceTab[ctxt->spaceNr] = val;
1408 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1409 return(ctxt->spaceNr++);
1410}
1411
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001412static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001413 int ret;
1414 if (ctxt->spaceNr <= 0) return(0);
1415 ctxt->spaceNr--;
1416 if (ctxt->spaceNr > 0)
1417 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1418 else
1419 ctxt->space = NULL;
1420 ret = ctxt->spaceTab[ctxt->spaceNr];
1421 ctxt->spaceTab[ctxt->spaceNr] = -1;
1422 return(ret);
1423}
1424
1425/*
1426 * Macros for accessing the content. Those should be used only by the parser,
1427 * and not exported.
1428 *
1429 * Dirty macros, i.e. one often need to make assumption on the context to
1430 * use them
1431 *
1432 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1433 * To be used with extreme caution since operations consuming
1434 * characters may move the input buffer to a different location !
1435 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1436 * This should be used internally by the parser
1437 * only to compare to ASCII values otherwise it would break when
1438 * running with UTF-8 encoding.
1439 * RAW same as CUR but in the input buffer, bypass any token
1440 * extraction that may have been done
1441 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1442 * to compare on ASCII based substring.
1443 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001444 * strings without newlines within the parser.
1445 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1446 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001447 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1448 *
1449 * NEXT Skip to the next character, this does the proper decoding
1450 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001451 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001452 * CUR_CHAR(l) returns the current unicode character (int), set l
1453 * to the number of xmlChars used for the encoding [0-5].
1454 * CUR_SCHAR same but operate on a string instead of the context
1455 * COPY_BUF copy the current unicode char to the target buffer, increment
1456 * the index
1457 * GROW, SHRINK handling of input buffers
1458 */
1459
Daniel Veillardfdc91562002-07-01 21:52:03 +00001460#define RAW (*ctxt->input->cur)
1461#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001462#define NXT(val) ctxt->input->cur[(val)]
1463#define CUR_PTR ctxt->input->cur
1464
Daniel Veillarda07050d2003-10-19 14:46:32 +00001465#define CMP4( s, c1, c2, c3, c4 ) \
1466 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1467 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1468#define CMP5( s, c1, c2, c3, c4, c5 ) \
1469 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1470#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1471 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1472#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1473 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1474#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1475 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1476#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1477 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1478 ((unsigned char *) s)[ 8 ] == c9 )
1479#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1480 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1481 ((unsigned char *) s)[ 9 ] == c10 )
1482
Owen Taylor3473f882001-02-23 17:55:21 +00001483#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001484 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001485 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1488 xmlPopInput(ctxt); \
1489 } while (0)
1490
Daniel Veillard0b787f32004-03-26 17:29:53 +00001491#define SKIPL(val) do { \
1492 int skipl; \
1493 for(skipl=0; skipl<val; skipl++) { \
1494 if (*(ctxt->input->cur) == '\n') { \
1495 ctxt->input->line++; ctxt->input->col = 1; \
1496 } else ctxt->input->col++; \
1497 ctxt->nbChars++; \
1498 ctxt->input->cur++; \
1499 } \
1500 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1501 if ((*ctxt->input->cur == 0) && \
1502 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1503 xmlPopInput(ctxt); \
1504 } while (0)
1505
Daniel Veillarda880b122003-04-21 21:36:41 +00001506#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001507 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1508 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001509 xmlSHRINK (ctxt);
1510
1511static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1512 xmlParserInputShrink(ctxt->input);
1513 if ((*ctxt->input->cur == 0) &&
1514 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1515 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001516 }
Owen Taylor3473f882001-02-23 17:55:21 +00001517
Daniel Veillarda880b122003-04-21 21:36:41 +00001518#define GROW if ((ctxt->progressive == 0) && \
1519 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001520 xmlGROW (ctxt);
1521
1522static void xmlGROW (xmlParserCtxtPtr ctxt) {
1523 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1524 if ((*ctxt->input->cur == 0) &&
1525 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1526 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001527}
Owen Taylor3473f882001-02-23 17:55:21 +00001528
1529#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1530
1531#define NEXT xmlNextChar(ctxt)
1532
Daniel Veillard21a0f912001-02-25 19:54:14 +00001533#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001534 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001535 ctxt->input->cur++; \
1536 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001537 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001538 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1539 }
1540
Owen Taylor3473f882001-02-23 17:55:21 +00001541#define NEXTL(l) do { \
1542 if (*(ctxt->input->cur) == '\n') { \
1543 ctxt->input->line++; ctxt->input->col = 1; \
1544 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001545 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001546 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001547 } while (0)
1548
1549#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1550#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1551
1552#define COPY_BUF(l,b,i,v) \
1553 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001554 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001555
1556/**
1557 * xmlSkipBlankChars:
1558 * @ctxt: the XML parser context
1559 *
1560 * skip all blanks character found at that point in the input streams.
1561 * It pops up finished entities in the process if allowable at that point.
1562 *
1563 * Returns the number of space chars skipped
1564 */
1565
1566int
1567xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001568 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001569
1570 /*
1571 * It's Okay to use CUR/NEXT here since all the blanks are on
1572 * the ASCII range.
1573 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001574 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1575 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001576 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001577 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001578 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001579 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001580 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001581 if (*cur == '\n') {
1582 ctxt->input->line++; ctxt->input->col = 1;
1583 }
1584 cur++;
1585 res++;
1586 if (*cur == 0) {
1587 ctxt->input->cur = cur;
1588 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1589 cur = ctxt->input->cur;
1590 }
1591 }
1592 ctxt->input->cur = cur;
1593 } else {
1594 int cur;
1595 do {
1596 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001597 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001598 NEXT;
1599 cur = CUR;
1600 res++;
1601 }
1602 while ((cur == 0) && (ctxt->inputNr > 1) &&
1603 (ctxt->instate != XML_PARSER_COMMENT)) {
1604 xmlPopInput(ctxt);
1605 cur = CUR;
1606 }
1607 /*
1608 * Need to handle support of entities branching here
1609 */
1610 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1611 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1612 }
Owen Taylor3473f882001-02-23 17:55:21 +00001613 return(res);
1614}
1615
1616/************************************************************************
1617 * *
1618 * Commodity functions to handle entities *
1619 * *
1620 ************************************************************************/
1621
1622/**
1623 * xmlPopInput:
1624 * @ctxt: an XML parser context
1625 *
1626 * xmlPopInput: the current input pointed by ctxt->input came to an end
1627 * pop it and return the next char.
1628 *
1629 * Returns the current xmlChar in the parser context
1630 */
1631xmlChar
1632xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001633 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001634 if (xmlParserDebugEntities)
1635 xmlGenericError(xmlGenericErrorContext,
1636 "Popping input %d\n", ctxt->inputNr);
1637 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001638 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001639 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1640 return(xmlPopInput(ctxt));
1641 return(CUR);
1642}
1643
1644/**
1645 * xmlPushInput:
1646 * @ctxt: an XML parser context
1647 * @input: an XML parser input fragment (entity, XML fragment ...).
1648 *
1649 * xmlPushInput: switch to a new input stream which is stacked on top
1650 * of the previous one(s).
1651 */
1652void
1653xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1654 if (input == NULL) return;
1655
1656 if (xmlParserDebugEntities) {
1657 if ((ctxt->input != NULL) && (ctxt->input->filename))
1658 xmlGenericError(xmlGenericErrorContext,
1659 "%s(%d): ", ctxt->input->filename,
1660 ctxt->input->line);
1661 xmlGenericError(xmlGenericErrorContext,
1662 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1663 }
1664 inputPush(ctxt, input);
1665 GROW;
1666}
1667
1668/**
1669 * xmlParseCharRef:
1670 * @ctxt: an XML parser context
1671 *
1672 * parse Reference declarations
1673 *
1674 * [66] CharRef ::= '&#' [0-9]+ ';' |
1675 * '&#x' [0-9a-fA-F]+ ';'
1676 *
1677 * [ WFC: Legal Character ]
1678 * Characters referred to using character references must match the
1679 * production for Char.
1680 *
1681 * Returns the value parsed (as an int), 0 in case of error
1682 */
1683int
1684xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001685 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001686 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001687 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001688
Owen Taylor3473f882001-02-23 17:55:21 +00001689 /*
1690 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1691 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001692 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001693 (NXT(2) == 'x')) {
1694 SKIP(3);
1695 GROW;
1696 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001697 if (count++ > 20) {
1698 count = 0;
1699 GROW;
1700 }
1701 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001702 val = val * 16 + (CUR - '0');
1703 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1704 val = val * 16 + (CUR - 'a') + 10;
1705 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1706 val = val * 16 + (CUR - 'A') + 10;
1707 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001709 val = 0;
1710 break;
1711 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001712 if (val > 0x10FFFF)
1713 outofrange = val;
1714
Owen Taylor3473f882001-02-23 17:55:21 +00001715 NEXT;
1716 count++;
1717 }
1718 if (RAW == ';') {
1719 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 ctxt->nbChars ++;
1722 ctxt->input->cur++;
1723 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001724 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001725 SKIP(2);
1726 GROW;
1727 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001728 if (count++ > 20) {
1729 count = 0;
1730 GROW;
1731 }
1732 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001733 val = val * 10 + (CUR - '0');
1734 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001735 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001736 val = 0;
1737 break;
1738 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001739 if (val > 0x10FFFF)
1740 outofrange = val;
1741
Owen Taylor3473f882001-02-23 17:55:21 +00001742 NEXT;
1743 count++;
1744 }
1745 if (RAW == ';') {
1746 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001747 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ctxt->nbChars ++;
1749 ctxt->input->cur++;
1750 }
1751 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001752 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001753 }
1754
1755 /*
1756 * [ WFC: Legal Character ]
1757 * Characters referred to using character references must match the
1758 * production for Char.
1759 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001760 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return(val);
1762 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001763 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1764 "xmlParseCharRef: invalid xmlChar value %d\n",
1765 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001766 }
1767 return(0);
1768}
1769
1770/**
1771 * xmlParseStringCharRef:
1772 * @ctxt: an XML parser context
1773 * @str: a pointer to an index in the string
1774 *
1775 * parse Reference declarations, variant parsing from a string rather
1776 * than an an input flow.
1777 *
1778 * [66] CharRef ::= '&#' [0-9]+ ';' |
1779 * '&#x' [0-9a-fA-F]+ ';'
1780 *
1781 * [ WFC: Legal Character ]
1782 * Characters referred to using character references must match the
1783 * production for Char.
1784 *
1785 * Returns the value parsed (as an int), 0 in case of error, str will be
1786 * updated to the current value of the index
1787 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001788static int
Owen Taylor3473f882001-02-23 17:55:21 +00001789xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1790 const xmlChar *ptr;
1791 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001792 unsigned int val = 0;
1793 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001794
1795 if ((str == NULL) || (*str == NULL)) return(0);
1796 ptr = *str;
1797 cur = *ptr;
1798 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1799 ptr += 3;
1800 cur = *ptr;
1801 while (cur != ';') { /* Non input consuming loop */
1802 if ((cur >= '0') && (cur <= '9'))
1803 val = val * 16 + (cur - '0');
1804 else if ((cur >= 'a') && (cur <= 'f'))
1805 val = val * 16 + (cur - 'a') + 10;
1806 else if ((cur >= 'A') && (cur <= 'F'))
1807 val = val * 16 + (cur - 'A') + 10;
1808 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001809 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001810 val = 0;
1811 break;
1812 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001813 if (val > 0x10FFFF)
1814 outofrange = val;
1815
Owen Taylor3473f882001-02-23 17:55:21 +00001816 ptr++;
1817 cur = *ptr;
1818 }
1819 if (cur == ';')
1820 ptr++;
1821 } else if ((cur == '&') && (ptr[1] == '#')){
1822 ptr += 2;
1823 cur = *ptr;
1824 while (cur != ';') { /* Non input consuming loops */
1825 if ((cur >= '0') && (cur <= '9'))
1826 val = val * 10 + (cur - '0');
1827 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001828 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001829 val = 0;
1830 break;
1831 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001832 if (val > 0x10FFFF)
1833 outofrange = val;
1834
Owen Taylor3473f882001-02-23 17:55:21 +00001835 ptr++;
1836 cur = *ptr;
1837 }
1838 if (cur == ';')
1839 ptr++;
1840 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001841 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 return(0);
1843 }
1844 *str = ptr;
1845
1846 /*
1847 * [ WFC: Legal Character ]
1848 * Characters referred to using character references must match the
1849 * production for Char.
1850 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001851 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001852 return(val);
1853 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001854 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1855 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1856 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001857 }
1858 return(0);
1859}
1860
1861/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001862 * xmlNewBlanksWrapperInputStream:
1863 * @ctxt: an XML parser context
1864 * @entity: an Entity pointer
1865 *
1866 * Create a new input stream for wrapping
1867 * blanks around a PEReference
1868 *
1869 * Returns the new input stream or NULL
1870 */
1871
1872static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1873
Daniel Veillardf4862f02002-09-10 11:13:43 +00001874static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001875xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1876 xmlParserInputPtr input;
1877 xmlChar *buffer;
1878 size_t length;
1879 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001880 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1881 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001882 return(NULL);
1883 }
1884 if (xmlParserDebugEntities)
1885 xmlGenericError(xmlGenericErrorContext,
1886 "new blanks wrapper for entity: %s\n", entity->name);
1887 input = xmlNewInputStream(ctxt);
1888 if (input == NULL) {
1889 return(NULL);
1890 }
1891 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001892 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001893 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001895 return(NULL);
1896 }
1897 buffer [0] = ' ';
1898 buffer [1] = '%';
1899 buffer [length-3] = ';';
1900 buffer [length-2] = ' ';
1901 buffer [length-1] = 0;
1902 memcpy(buffer + 2, entity->name, length - 5);
1903 input->free = deallocblankswrapper;
1904 input->base = buffer;
1905 input->cur = buffer;
1906 input->length = length;
1907 input->end = &buffer[length];
1908 return(input);
1909}
1910
1911/**
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * xmlParserHandlePEReference:
1913 * @ctxt: the parser context
1914 *
1915 * [69] PEReference ::= '%' Name ';'
1916 *
1917 * [ WFC: No Recursion ]
1918 * A parsed entity must not contain a recursive
1919 * reference to itself, either directly or indirectly.
1920 *
1921 * [ WFC: Entity Declared ]
1922 * In a document without any DTD, a document with only an internal DTD
1923 * subset which contains no parameter entity references, or a document
1924 * with "standalone='yes'", ... ... The declaration of a parameter
1925 * entity must precede any reference to it...
1926 *
1927 * [ VC: Entity Declared ]
1928 * In a document with an external subset or external parameter entities
1929 * with "standalone='no'", ... ... The declaration of a parameter entity
1930 * must precede any reference to it...
1931 *
1932 * [ WFC: In DTD ]
1933 * Parameter-entity references may only appear in the DTD.
1934 * NOTE: misleading but this is handled.
1935 *
1936 * A PEReference may have been detected in the current input stream
1937 * the handling is done accordingly to
1938 * http://www.w3.org/TR/REC-xml#entproc
1939 * i.e.
1940 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001941 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001942 */
1943void
1944xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001945 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001946 xmlEntityPtr entity = NULL;
1947 xmlParserInputPtr input;
1948
Owen Taylor3473f882001-02-23 17:55:21 +00001949 if (RAW != '%') return;
1950 switch(ctxt->instate) {
1951 case XML_PARSER_CDATA_SECTION:
1952 return;
1953 case XML_PARSER_COMMENT:
1954 return;
1955 case XML_PARSER_START_TAG:
1956 return;
1957 case XML_PARSER_END_TAG:
1958 return;
1959 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001961 return;
1962 case XML_PARSER_PROLOG:
1963 case XML_PARSER_START:
1964 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001965 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return;
1967 case XML_PARSER_ENTITY_DECL:
1968 case XML_PARSER_CONTENT:
1969 case XML_PARSER_ATTRIBUTE_VALUE:
1970 case XML_PARSER_PI:
1971 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001972 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001973 /* we just ignore it there */
1974 return;
1975 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001976 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 return;
1978 case XML_PARSER_ENTITY_VALUE:
1979 /*
1980 * NOTE: in the case of entity values, we don't do the
1981 * substitution here since we need the literal
1982 * entity value to be able to save the internal
1983 * subset of the document.
1984 * This will be handled by xmlStringDecodeEntities
1985 */
1986 return;
1987 case XML_PARSER_DTD:
1988 /*
1989 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1990 * In the internal DTD subset, parameter-entity references
1991 * can occur only where markup declarations can occur, not
1992 * within markup declarations.
1993 * In that case this is handled in xmlParseMarkupDecl
1994 */
1995 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1996 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001997 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001998 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001999 break;
2000 case XML_PARSER_IGNORE:
2001 return;
2002 }
2003
2004 NEXT;
2005 name = xmlParseName(ctxt);
2006 if (xmlParserDebugEntities)
2007 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002008 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002009 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002010 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002011 } else {
2012 if (RAW == ';') {
2013 NEXT;
2014 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2015 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2016 if (entity == NULL) {
2017
2018 /*
2019 * [ WFC: Entity Declared ]
2020 * In a document without any DTD, a document with only an
2021 * internal DTD subset which contains no parameter entity
2022 * references, or a document with "standalone='yes'", ...
2023 * ... The declaration of a parameter entity must precede
2024 * any reference to it...
2025 */
2026 if ((ctxt->standalone == 1) ||
2027 ((ctxt->hasExternalSubset == 0) &&
2028 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002029 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002030 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } else {
2032 /*
2033 * [ VC: Entity Declared ]
2034 * In a document with an external subset or external
2035 * parameter entities with "standalone='no'", ...
2036 * ... The declaration of a parameter entity must precede
2037 * any reference to it...
2038 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002039 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2040 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2041 "PEReference: %%%s; not found\n",
2042 name);
2043 } else
2044 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2045 "PEReference: %%%s; not found\n",
2046 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002047 ctxt->valid = 0;
2048 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002049 } else if (ctxt->input->free != deallocblankswrapper) {
2050 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
2051 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002052 } else {
2053 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2054 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002055 xmlChar start[4];
2056 xmlCharEncoding enc;
2057
Owen Taylor3473f882001-02-23 17:55:21 +00002058 /*
2059 * handle the extra spaces added before and after
2060 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002061 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
2063 input = xmlNewEntityInputStream(ctxt, entity);
2064 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00002065
2066 /*
2067 * Get the 4 first bytes and decode the charset
2068 * if enc != XML_CHAR_ENCODING_NONE
2069 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002070 * Note that, since we may have some non-UTF8
2071 * encoding (like UTF16, bug 135229), the 'length'
2072 * is not known, but we can calculate based upon
2073 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002074 */
2075 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002076 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002077 start[0] = RAW;
2078 start[1] = NXT(1);
2079 start[2] = NXT(2);
2080 start[3] = NXT(3);
2081 enc = xmlDetectCharEncoding(start, 4);
2082 if (enc != XML_CHAR_ENCODING_NONE) {
2083 xmlSwitchEncoding(ctxt, enc);
2084 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002085 }
2086
Owen Taylor3473f882001-02-23 17:55:21 +00002087 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002088 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2089 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 xmlParseTextDecl(ctxt);
2091 }
Owen Taylor3473f882001-02-23 17:55:21 +00002092 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002093 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2094 "PEReference: %s is not a parameter entity\n",
2095 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 }
2098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002099 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002100 }
Owen Taylor3473f882001-02-23 17:55:21 +00002101 }
2102}
2103
2104/*
2105 * Macro used to grow the current buffer.
2106 */
2107#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002108 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002109 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002110 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002112 if (tmp == NULL) goto mem_error; \
2113 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002114}
2115
2116/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002117 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002118 * @ctxt: the parser context
2119 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002120 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002121 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2122 * @end: an end marker xmlChar, 0 if none
2123 * @end2: an end marker xmlChar, 0 if none
2124 * @end3: an end marker xmlChar, 0 if none
2125 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002126 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002127 *
2128 * [67] Reference ::= EntityRef | CharRef
2129 *
2130 * [69] PEReference ::= '%' Name ';'
2131 *
2132 * Returns A newly allocated string with the substitution done. The caller
2133 * must deallocate it !
2134 */
2135xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002136xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2137 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002138 xmlChar *buffer = NULL;
2139 int buffer_size = 0;
2140
2141 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002142 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002143 xmlEntityPtr ent;
2144 int c,l;
2145 int nbchars = 0;
2146
Daniel Veillarda82b1822004-11-08 16:24:57 +00002147 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002148 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002149 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002150
2151 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002152 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 return(NULL);
2154 }
2155
2156 /*
2157 * allocate a translation buffer.
2158 */
2159 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002160 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002161 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002162
2163 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002164 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002165 * we are operating on already parsed values.
2166 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002167 if (str < last)
2168 c = CUR_SCHAR(str, l);
2169 else
2170 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002171 while ((c != 0) && (c != end) && /* non input consuming loop */
2172 (c != end2) && (c != end3)) {
2173
2174 if (c == 0) break;
2175 if ((c == '&') && (str[1] == '#')) {
2176 int val = xmlParseStringCharRef(ctxt, &str);
2177 if (val != 0) {
2178 COPY_BUF(0,buffer,nbchars,val);
2179 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002180 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2181 growBuffer(buffer);
2182 }
Owen Taylor3473f882001-02-23 17:55:21 +00002183 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2184 if (xmlParserDebugEntities)
2185 xmlGenericError(xmlGenericErrorContext,
2186 "String decoding Entity Reference: %.30s\n",
2187 str);
2188 ent = xmlParseStringEntityRef(ctxt, &str);
2189 if ((ent != NULL) &&
2190 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2191 if (ent->content != NULL) {
2192 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002193 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2194 growBuffer(buffer);
2195 }
Owen Taylor3473f882001-02-23 17:55:21 +00002196 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002197 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2198 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002199 }
2200 } else if ((ent != NULL) && (ent->content != NULL)) {
2201 xmlChar *rep;
2202
2203 ctxt->depth++;
2204 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2205 0, 0, 0);
2206 ctxt->depth--;
2207 if (rep != NULL) {
2208 current = rep;
2209 while (*current != 0) { /* non input consuming loop */
2210 buffer[nbchars++] = *current++;
2211 if (nbchars >
2212 buffer_size - XML_PARSER_BUFFER_SIZE) {
2213 growBuffer(buffer);
2214 }
2215 }
2216 xmlFree(rep);
2217 }
2218 } else if (ent != NULL) {
2219 int i = xmlStrlen(ent->name);
2220 const xmlChar *cur = ent->name;
2221
2222 buffer[nbchars++] = '&';
2223 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2224 growBuffer(buffer);
2225 }
2226 for (;i > 0;i--)
2227 buffer[nbchars++] = *cur++;
2228 buffer[nbchars++] = ';';
2229 }
2230 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2231 if (xmlParserDebugEntities)
2232 xmlGenericError(xmlGenericErrorContext,
2233 "String decoding PE Reference: %.30s\n", str);
2234 ent = xmlParseStringPEReference(ctxt, &str);
2235 if (ent != NULL) {
2236 xmlChar *rep;
2237
2238 ctxt->depth++;
2239 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2240 0, 0, 0);
2241 ctxt->depth--;
2242 if (rep != NULL) {
2243 current = rep;
2244 while (*current != 0) { /* non input consuming loop */
2245 buffer[nbchars++] = *current++;
2246 if (nbchars >
2247 buffer_size - XML_PARSER_BUFFER_SIZE) {
2248 growBuffer(buffer);
2249 }
2250 }
2251 xmlFree(rep);
2252 }
2253 }
2254 } else {
2255 COPY_BUF(l,buffer,nbchars,c);
2256 str += l;
2257 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2258 growBuffer(buffer);
2259 }
2260 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002261 if (str < last)
2262 c = CUR_SCHAR(str, l);
2263 else
2264 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002265 }
2266 buffer[nbchars++] = 0;
2267 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002268
2269mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272}
2273
Daniel Veillarde57ec792003-09-10 10:50:59 +00002274/**
2275 * xmlStringDecodeEntities:
2276 * @ctxt: the parser context
2277 * @str: the input string
2278 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2279 * @end: an end marker xmlChar, 0 if none
2280 * @end2: an end marker xmlChar, 0 if none
2281 * @end3: an end marker xmlChar, 0 if none
2282 *
2283 * Takes a entity string content and process to do the adequate substitutions.
2284 *
2285 * [67] Reference ::= EntityRef | CharRef
2286 *
2287 * [69] PEReference ::= '%' Name ';'
2288 *
2289 * Returns A newly allocated string with the substitution done. The caller
2290 * must deallocate it !
2291 */
2292xmlChar *
2293xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2294 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002295 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002296 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2297 end, end2, end3));
2298}
Owen Taylor3473f882001-02-23 17:55:21 +00002299
2300/************************************************************************
2301 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002302 * Commodity functions, cleanup needed ? *
2303 * *
2304 ************************************************************************/
2305
2306/**
2307 * areBlanks:
2308 * @ctxt: an XML parser context
2309 * @str: a xmlChar *
2310 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002311 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002312 *
2313 * Is this a sequence of blank chars that one can ignore ?
2314 *
2315 * Returns 1 if ignorable 0 otherwise.
2316 */
2317
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002318static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2319 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002320 int i, ret;
2321 xmlNodePtr lastChild;
2322
Daniel Veillard05c13a22001-09-09 08:38:09 +00002323 /*
2324 * Don't spend time trying to differentiate them, the same callback is
2325 * used !
2326 */
2327 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002328 return(0);
2329
Owen Taylor3473f882001-02-23 17:55:21 +00002330 /*
2331 * Check for xml:space value.
2332 */
2333 if (*(ctxt->space) == 1)
2334 return(0);
2335
2336 /*
2337 * Check that the string is made of blanks
2338 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002339 if (blank_chars == 0) {
2340 for (i = 0;i < len;i++)
2341 if (!(IS_BLANK_CH(str[i]))) return(0);
2342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343
2344 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002345 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002346 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002347 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 if (ctxt->myDoc != NULL) {
2349 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2350 if (ret == 0) return(1);
2351 if (ret == 1) return(0);
2352 }
2353
2354 /*
2355 * Otherwise, heuristic :-\
2356 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002357 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002358 if ((ctxt->node->children == NULL) &&
2359 (RAW == '<') && (NXT(1) == '/')) return(0);
2360
2361 lastChild = xmlGetLastChild(ctxt->node);
2362 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002363 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2364 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else if (xmlNodeIsText(lastChild))
2366 return(0);
2367 else if ((ctxt->node->children != NULL) &&
2368 (xmlNodeIsText(ctxt->node->children)))
2369 return(0);
2370 return(1);
2371}
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373/************************************************************************
2374 * *
2375 * Extra stuff for namespace support *
2376 * Relates to http://www.w3.org/TR/WD-xml-names *
2377 * *
2378 ************************************************************************/
2379
2380/**
2381 * xmlSplitQName:
2382 * @ctxt: an XML parser context
2383 * @name: an XML parser context
2384 * @prefix: a xmlChar **
2385 *
2386 * parse an UTF8 encoded XML qualified name string
2387 *
2388 * [NS 5] QName ::= (Prefix ':')? LocalPart
2389 *
2390 * [NS 6] Prefix ::= NCName
2391 *
2392 * [NS 7] LocalPart ::= NCName
2393 *
2394 * Returns the local part, and prefix is updated
2395 * to get the Prefix if any.
2396 */
2397
2398xmlChar *
2399xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2400 xmlChar buf[XML_MAX_NAMELEN + 5];
2401 xmlChar *buffer = NULL;
2402 int len = 0;
2403 int max = XML_MAX_NAMELEN;
2404 xmlChar *ret = NULL;
2405 const xmlChar *cur = name;
2406 int c;
2407
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002408 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002409 *prefix = NULL;
2410
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002411 if (cur == NULL) return(NULL);
2412
Owen Taylor3473f882001-02-23 17:55:21 +00002413#ifndef XML_XML_NAMESPACE
2414 /* xml: prefix is not really a namespace */
2415 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2416 (cur[2] == 'l') && (cur[3] == ':'))
2417 return(xmlStrdup(name));
2418#endif
2419
Daniel Veillard597bc482003-07-24 16:08:28 +00002420 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if (cur[0] == ':')
2422 return(xmlStrdup(name));
2423
2424 c = *cur++;
2425 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2426 buf[len++] = c;
2427 c = *cur++;
2428 }
2429 if (len >= max) {
2430 /*
2431 * Okay someone managed to make a huge name, so he's ready to pay
2432 * for the processing speed.
2433 */
2434 max = len * 2;
2435
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002436 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002439 return(NULL);
2440 }
2441 memcpy(buffer, buf, len);
2442 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2443 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002444 xmlChar *tmp;
2445
Owen Taylor3473f882001-02-23 17:55:21 +00002446 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002447 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002448 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002449 if (tmp == NULL) {
2450 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002451 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002452 return(NULL);
2453 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002454 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002455 }
2456 buffer[len++] = c;
2457 c = *cur++;
2458 }
2459 buffer[len] = 0;
2460 }
2461
Daniel Veillard597bc482003-07-24 16:08:28 +00002462 /* nasty but well=formed
2463 if ((c == ':') && (*cur == 0)) {
2464 return(xmlStrdup(name));
2465 } */
2466
Owen Taylor3473f882001-02-23 17:55:21 +00002467 if (buffer == NULL)
2468 ret = xmlStrndup(buf, len);
2469 else {
2470 ret = buffer;
2471 buffer = NULL;
2472 max = XML_MAX_NAMELEN;
2473 }
2474
2475
2476 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002477 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002479 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002480 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002481 }
Owen Taylor3473f882001-02-23 17:55:21 +00002482 len = 0;
2483
Daniel Veillardbb284f42002-10-16 18:02:47 +00002484 /*
2485 * Check that the first character is proper to start
2486 * a new name
2487 */
2488 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2489 ((c >= 0x41) && (c <= 0x5A)) ||
2490 (c == '_') || (c == ':'))) {
2491 int l;
2492 int first = CUR_SCHAR(cur, l);
2493
2494 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002495 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002496 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002497 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002498 }
2499 }
2500 cur++;
2501
Owen Taylor3473f882001-02-23 17:55:21 +00002502 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2503 buf[len++] = c;
2504 c = *cur++;
2505 }
2506 if (len >= max) {
2507 /*
2508 * Okay someone managed to make a huge name, so he's ready to pay
2509 * for the processing speed.
2510 */
2511 max = len * 2;
2512
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002513 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002514 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(NULL);
2517 }
2518 memcpy(buffer, buf, len);
2519 while (c != 0) { /* tested bigname2.xml */
2520 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002521 xmlChar *tmp;
2522
Owen Taylor3473f882001-02-23 17:55:21 +00002523 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002524 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002525 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002526 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002527 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002528 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return(NULL);
2530 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002531 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 }
2533 buffer[len++] = c;
2534 c = *cur++;
2535 }
2536 buffer[len] = 0;
2537 }
2538
2539 if (buffer == NULL)
2540 ret = xmlStrndup(buf, len);
2541 else {
2542 ret = buffer;
2543 }
2544 }
2545
2546 return(ret);
2547}
2548
2549/************************************************************************
2550 * *
2551 * The parser itself *
2552 * Relates to http://www.w3.org/TR/REC-xml *
2553 * *
2554 ************************************************************************/
2555
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002556static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002557static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002558 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002559
Owen Taylor3473f882001-02-23 17:55:21 +00002560/**
2561 * xmlParseName:
2562 * @ctxt: an XML parser context
2563 *
2564 * parse an XML name.
2565 *
2566 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2567 * CombiningChar | Extender
2568 *
2569 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2570 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002571 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002572 *
2573 * Returns the Name parsed or NULL
2574 */
2575
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002576const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002577xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002578 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002579 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002580 int count = 0;
2581
2582 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583
2584 /*
2585 * Accelerator for simple ASCII names
2586 */
2587 in = ctxt->input->cur;
2588 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2589 ((*in >= 0x41) && (*in <= 0x5A)) ||
2590 (*in == '_') || (*in == ':')) {
2591 in++;
2592 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2593 ((*in >= 0x41) && (*in <= 0x5A)) ||
2594 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002595 (*in == '_') || (*in == '-') ||
2596 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002597 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002598 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002601 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002602 ctxt->nbChars += count;
2603 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002604 if (ret == NULL)
2605 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002606 return(ret);
2607 }
2608 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002609 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002610}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002611
Daniel Veillard46de64e2002-05-29 08:21:33 +00002612/**
2613 * xmlParseNameAndCompare:
2614 * @ctxt: an XML parser context
2615 *
2616 * parse an XML name and compares for match
2617 * (specialized for endtag parsing)
2618 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002619 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2620 * and the name for mismatch
2621 */
2622
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002623static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002624xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002625 register const xmlChar *cmp = other;
2626 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002627 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002628
2629 GROW;
2630
2631 in = ctxt->input->cur;
2632 while (*in != 0 && *in == *cmp) {
2633 ++in;
2634 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002635 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002636 }
William M. Brack76e95df2003-10-18 16:20:14 +00002637 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002638 /* success */
2639 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002640 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002641 }
2642 /* failure (or end of input buffer), check with full function */
2643 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002644 /* strings coming from the dictionnary direct compare possible */
2645 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002646 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002647 }
2648 return ret;
2649}
2650
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002651static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002652xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002653 int len = 0, l;
2654 int c;
2655 int count = 0;
2656
2657 /*
2658 * Handler for more complex cases
2659 */
2660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002661 c = CUR_CHAR(l);
2662 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2663 (!IS_LETTER(c) && (c != '_') &&
2664 (c != ':'))) {
2665 return(NULL);
2666 }
2667
2668 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002669 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002670 (c == '.') || (c == '-') ||
2671 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002672 (IS_COMBINING(c)) ||
2673 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002674 if (count++ > 100) {
2675 count = 0;
2676 GROW;
2677 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002678 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002679 NEXTL(l);
2680 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
Daniel Veillard96688262005-08-23 18:14:12 +00002682 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
2683 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002684 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002685}
2686
2687/**
2688 * xmlParseStringName:
2689 * @ctxt: an XML parser context
2690 * @str: a pointer to the string pointer (IN/OUT)
2691 *
2692 * parse an XML name.
2693 *
2694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2695 * CombiningChar | Extender
2696 *
2697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2698 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002699 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002700 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002701 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002702 * is updated to the current location in the string.
2703 */
2704
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002705static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002706xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2707 xmlChar buf[XML_MAX_NAMELEN + 5];
2708 const xmlChar *cur = *str;
2709 int len = 0, l;
2710 int c;
2711
2712 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002713 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002714 (c != ':')) {
2715 return(NULL);
2716 }
2717
William M. Brack871611b2003-10-18 04:53:14 +00002718 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002719 (c == '.') || (c == '-') ||
2720 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002721 (IS_COMBINING(c)) ||
2722 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002723 COPY_BUF(l,buf,len,c);
2724 cur += l;
2725 c = CUR_SCHAR(cur, l);
2726 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2727 /*
2728 * Okay someone managed to make a huge name, so he's ready to pay
2729 * for the processing speed.
2730 */
2731 xmlChar *buffer;
2732 int max = len * 2;
2733
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002734 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002735 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002737 return(NULL);
2738 }
2739 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002741 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002742 (c == '.') || (c == '-') ||
2743 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002744 (IS_COMBINING(c)) ||
2745 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002747 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002749 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002750 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002753 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
2755 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002756 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 COPY_BUF(l,buffer,len,c);
2759 cur += l;
2760 c = CUR_SCHAR(cur, l);
2761 }
2762 buffer[len] = 0;
2763 *str = cur;
2764 return(buffer);
2765 }
2766 }
2767 *str = cur;
2768 return(xmlStrndup(buf, len));
2769}
2770
2771/**
2772 * xmlParseNmtoken:
2773 * @ctxt: an XML parser context
2774 *
2775 * parse an XML Nmtoken.
2776 *
2777 * [7] Nmtoken ::= (NameChar)+
2778 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002779 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002780 *
2781 * Returns the Nmtoken parsed or NULL
2782 */
2783
2784xmlChar *
2785xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2786 xmlChar buf[XML_MAX_NAMELEN + 5];
2787 int len = 0, l;
2788 int c;
2789 int count = 0;
2790
2791 GROW;
2792 c = CUR_CHAR(l);
2793
William M. Brack871611b2003-10-18 04:53:14 +00002794 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002795 (c == '.') || (c == '-') ||
2796 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002797 (IS_COMBINING(c)) ||
2798 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002799 if (count++ > 100) {
2800 count = 0;
2801 GROW;
2802 }
2803 COPY_BUF(l,buf,len,c);
2804 NEXTL(l);
2805 c = CUR_CHAR(l);
2806 if (len >= XML_MAX_NAMELEN) {
2807 /*
2808 * Okay someone managed to make a huge token, so he's ready to pay
2809 * for the processing speed.
2810 */
2811 xmlChar *buffer;
2812 int max = len * 2;
2813
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002814 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002817 return(NULL);
2818 }
2819 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002821 (c == '.') || (c == '-') ||
2822 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002823 (IS_COMBINING(c)) ||
2824 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002825 if (count++ > 100) {
2826 count = 0;
2827 GROW;
2828 }
2829 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002830 xmlChar *tmp;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002838 return(NULL);
2839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 }
2842 COPY_BUF(l,buffer,len,c);
2843 NEXTL(l);
2844 c = CUR_CHAR(l);
2845 }
2846 buffer[len] = 0;
2847 return(buffer);
2848 }
2849 }
2850 if (len == 0)
2851 return(NULL);
2852 return(xmlStrndup(buf, len));
2853}
2854
2855/**
2856 * xmlParseEntityValue:
2857 * @ctxt: an XML parser context
2858 * @orig: if non-NULL store a copy of the original entity value
2859 *
2860 * parse a value for ENTITY declarations
2861 *
2862 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2863 * "'" ([^%&'] | PEReference | Reference)* "'"
2864 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002865 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002866 */
2867
2868xmlChar *
2869xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2870 xmlChar *buf = NULL;
2871 int len = 0;
2872 int size = XML_PARSER_BUFFER_SIZE;
2873 int c, l;
2874 xmlChar stop;
2875 xmlChar *ret = NULL;
2876 const xmlChar *cur = NULL;
2877 xmlParserInputPtr input;
2878
2879 if (RAW == '"') stop = '"';
2880 else if (RAW == '\'') stop = '\'';
2881 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(NULL);
2884 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002885 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002887 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002888 return(NULL);
2889 }
2890
2891 /*
2892 * The content of the entity definition is copied in a buffer.
2893 */
2894
2895 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2896 input = ctxt->input;
2897 GROW;
2898 NEXT;
2899 c = CUR_CHAR(l);
2900 /*
2901 * NOTE: 4.4.5 Included in Literal
2902 * When a parameter entity reference appears in a literal entity
2903 * value, ... a single or double quote character in the replacement
2904 * text is always treated as a normal data character and will not
2905 * terminate the literal.
2906 * In practice it means we stop the loop only when back at parsing
2907 * the initial entity and the quote is found
2908 */
William M. Brack871611b2003-10-18 04:53:14 +00002909 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 (ctxt->input != input))) {
2911 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002912 xmlChar *tmp;
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002915 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2916 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002917 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002918 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002919 return(NULL);
2920 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002921 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 }
2923 COPY_BUF(l,buf,len,c);
2924 NEXTL(l);
2925 /*
2926 * Pop-up of finished entities.
2927 */
2928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2929 xmlPopInput(ctxt);
2930
2931 GROW;
2932 c = CUR_CHAR(l);
2933 if (c == 0) {
2934 GROW;
2935 c = CUR_CHAR(l);
2936 }
2937 }
2938 buf[len] = 0;
2939
2940 /*
2941 * Raise problem w.r.t. '&' and '%' being used in non-entities
2942 * reference constructs. Note Charref will be handled in
2943 * xmlStringDecodeEntities()
2944 */
2945 cur = buf;
2946 while (*cur != 0) { /* non input consuming */
2947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2948 xmlChar *name;
2949 xmlChar tmp = *cur;
2950
2951 cur++;
2952 name = xmlParseStringName(ctxt, &cur);
2953 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002954 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002955 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002956 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002958 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2959 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 }
2962 if (name != NULL)
2963 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002964 if (*cur == 0)
2965 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002966 }
2967 cur++;
2968 }
2969
2970 /*
2971 * Then PEReference entities are substituted.
2972 */
2973 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002974 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002975 xmlFree(buf);
2976 } else {
2977 NEXT;
2978 /*
2979 * NOTE: 4.4.7 Bypassed
2980 * When a general entity reference appears in the EntityValue in
2981 * an entity declaration, it is bypassed and left as is.
2982 * so XML_SUBSTITUTE_REF is not set here.
2983 */
2984 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2985 0, 0, 0);
2986 if (orig != NULL)
2987 *orig = buf;
2988 else
2989 xmlFree(buf);
2990 }
2991
2992 return(ret);
2993}
2994
2995/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002996 * xmlParseAttValueComplex:
2997 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002998 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002999 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003000 *
3001 * parse a value for an attribute, this is the fallback function
3002 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003003 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003008xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003009 xmlChar limit = 0;
3010 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003011 int len = 0;
3012 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003013 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003014 xmlChar *current = NULL;
3015 xmlEntityPtr ent;
3016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (NXT(0) == '"') {
3018 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3019 limit = '"';
3020 NEXT;
3021 } else if (NXT(0) == '\'') {
3022 limit = '\'';
3023 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3024 NEXT;
3025 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029
3030 /*
3031 * allocate a translation buffer.
3032 */
3033 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003034 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003035 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003036
3037 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003038 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003039 */
3040 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003041 while ((NXT(0) != limit) && /* checked */
3042 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003043 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003044 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003045 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 if (NXT(1) == '#') {
3047 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003048
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003050 if (ctxt->replaceEntities) {
3051 if (len > buf_size - 10) {
3052 growBuffer(buf);
3053 }
3054 buf[len++] = '&';
3055 } else {
3056 /*
3057 * The reparsing will be done in xmlStringGetNodeList()
3058 * called by the attribute() function in SAX.c
3059 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003060 if (len > buf_size - 10) {
3061 growBuffer(buf);
3062 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003063 buf[len++] = '&';
3064 buf[len++] = '#';
3065 buf[len++] = '3';
3066 buf[len++] = '8';
3067 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003070 if (len > buf_size - 10) {
3071 growBuffer(buf);
3072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len += xmlCopyChar(0, &buf[len], val);
3074 }
3075 } else {
3076 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003077 if ((ent != NULL) &&
3078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3079 if (len > buf_size - 10) {
3080 growBuffer(buf);
3081 }
3082 if ((ctxt->replaceEntities == 0) &&
3083 (ent->content[0] == '&')) {
3084 buf[len++] = '&';
3085 buf[len++] = '#';
3086 buf[len++] = '3';
3087 buf[len++] = '8';
3088 buf[len++] = ';';
3089 } else {
3090 buf[len++] = ent->content[0];
3091 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003092 } else if ((ent != NULL) &&
3093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003094 xmlChar *rep;
3095
3096 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3097 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003098 XML_SUBSTITUTE_REF,
3099 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (rep != NULL) {
3101 current = rep;
3102 while (*current != 0) { /* non input consuming */
3103 buf[len++] = *current++;
3104 if (len > buf_size - 10) {
3105 growBuffer(buf);
3106 }
3107 }
3108 xmlFree(rep);
3109 }
3110 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003111 if (len > buf_size - 10) {
3112 growBuffer(buf);
3113 }
Owen Taylor3473f882001-02-23 17:55:21 +00003114 if (ent->content != NULL)
3115 buf[len++] = ent->content[0];
3116 }
3117 } else if (ent != NULL) {
3118 int i = xmlStrlen(ent->name);
3119 const xmlChar *cur = ent->name;
3120
3121 /*
3122 * This may look absurd but is needed to detect
3123 * entities problems
3124 */
3125 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3126 (ent->content != NULL)) {
3127 xmlChar *rep;
3128 rep = xmlStringDecodeEntities(ctxt, ent->content,
3129 XML_SUBSTITUTE_REF, 0, 0, 0);
3130 if (rep != NULL)
3131 xmlFree(rep);
3132 }
3133
3134 /*
3135 * Just output the reference
3136 */
3137 buf[len++] = '&';
3138 if (len > buf_size - i - 10) {
3139 growBuffer(buf);
3140 }
3141 for (;i > 0;i--)
3142 buf[len++] = *cur++;
3143 buf[len++] = ';';
3144 }
3145 }
3146 } else {
3147 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003148 if ((len != 0) || (!normalize)) {
3149 if ((!normalize) || (!in_space)) {
3150 COPY_BUF(l,buf,len,0x20);
3151 if (len > buf_size - 10) {
3152 growBuffer(buf);
3153 }
3154 }
3155 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003156 }
3157 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003158 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003159 COPY_BUF(l,buf,len,c);
3160 if (len > buf_size - 10) {
3161 growBuffer(buf);
3162 }
3163 }
3164 NEXTL(l);
3165 }
3166 GROW;
3167 c = CUR_CHAR(l);
3168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003169 if ((in_space) && (normalize)) {
3170 while (buf[len - 1] == 0x20) len--;
3171 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003172 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003173 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003174 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003175 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003176 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3177 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003178 } else
3179 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003180 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003181 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003182
3183mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003185 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003186}
3187
3188/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003189 * xmlParseAttValue:
3190 * @ctxt: an XML parser context
3191 *
3192 * parse a value for an attribute
3193 * Note: the parser won't do substitution of entities here, this
3194 * will be handled later in xmlStringGetNodeList
3195 *
3196 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3197 * "'" ([^<&'] | Reference)* "'"
3198 *
3199 * 3.3.3 Attribute-Value Normalization:
3200 * Before the value of an attribute is passed to the application or
3201 * checked for validity, the XML processor must normalize it as follows:
3202 * - a character reference is processed by appending the referenced
3203 * character to the attribute value
3204 * - an entity reference is processed by recursively processing the
3205 * replacement text of the entity
3206 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3207 * appending #x20 to the normalized value, except that only a single
3208 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3209 * parsed entity or the literal entity value of an internal parsed entity
3210 * - other characters are processed by appending them to the normalized value
3211 * If the declared value is not CDATA, then the XML processor must further
3212 * process the normalized attribute value by discarding any leading and
3213 * trailing space (#x20) characters, and by replacing sequences of space
3214 * (#x20) characters by a single space (#x20) character.
3215 * All attributes for which no declaration has been read should be treated
3216 * by a non-validating parser as if declared CDATA.
3217 *
3218 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3219 */
3220
3221
3222xmlChar *
3223xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003224 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003225 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003226}
3227
3228/**
Owen Taylor3473f882001-02-23 17:55:21 +00003229 * xmlParseSystemLiteral:
3230 * @ctxt: an XML parser context
3231 *
3232 * parse an XML Literal
3233 *
3234 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3235 *
3236 * Returns the SystemLiteral parsed or NULL
3237 */
3238
3239xmlChar *
3240xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3241 xmlChar *buf = NULL;
3242 int len = 0;
3243 int size = XML_PARSER_BUFFER_SIZE;
3244 int cur, l;
3245 xmlChar stop;
3246 int state = ctxt->instate;
3247 int count = 0;
3248
3249 SHRINK;
3250 if (RAW == '"') {
3251 NEXT;
3252 stop = '"';
3253 } else if (RAW == '\'') {
3254 NEXT;
3255 stop = '\'';
3256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003258 return(NULL);
3259 }
3260
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003261 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003262 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003264 return(NULL);
3265 }
3266 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3267 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003268 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003269 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003270 xmlChar *tmp;
3271
Owen Taylor3473f882001-02-23 17:55:21 +00003272 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003273 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3274 if (tmp == NULL) {
3275 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003276 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003277 ctxt->instate = (xmlParserInputState) state;
3278 return(NULL);
3279 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003280 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003281 }
3282 count++;
3283 if (count > 50) {
3284 GROW;
3285 count = 0;
3286 }
3287 COPY_BUF(l,buf,len,cur);
3288 NEXTL(l);
3289 cur = CUR_CHAR(l);
3290 if (cur == 0) {
3291 GROW;
3292 SHRINK;
3293 cur = CUR_CHAR(l);
3294 }
3295 }
3296 buf[len] = 0;
3297 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003298 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003299 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003300 } else {
3301 NEXT;
3302 }
3303 return(buf);
3304}
3305
3306/**
3307 * xmlParsePubidLiteral:
3308 * @ctxt: an XML parser context
3309 *
3310 * parse an XML public literal
3311 *
3312 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3313 *
3314 * Returns the PubidLiteral parsed or NULL.
3315 */
3316
3317xmlChar *
3318xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3319 xmlChar *buf = NULL;
3320 int len = 0;
3321 int size = XML_PARSER_BUFFER_SIZE;
3322 xmlChar cur;
3323 xmlChar stop;
3324 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003325 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003326
3327 SHRINK;
3328 if (RAW == '"') {
3329 NEXT;
3330 stop = '"';
3331 } else if (RAW == '\'') {
3332 NEXT;
3333 stop = '\'';
3334 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003336 return(NULL);
3337 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003338 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003340 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 return(NULL);
3342 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003343 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003344 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003345 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003346 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003347 xmlChar *tmp;
3348
Owen Taylor3473f882001-02-23 17:55:21 +00003349 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003350 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3351 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003352 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003353 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003354 return(NULL);
3355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 }
3358 buf[len++] = cur;
3359 count++;
3360 if (count > 50) {
3361 GROW;
3362 count = 0;
3363 }
3364 NEXT;
3365 cur = CUR;
3366 if (cur == 0) {
3367 GROW;
3368 SHRINK;
3369 cur = CUR;
3370 }
3371 }
3372 buf[len] = 0;
3373 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 } else {
3376 NEXT;
3377 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003378 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003379 return(buf);
3380}
3381
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003383
3384/*
3385 * used for the test in the inner loop of the char data testing
3386 */
3387static const unsigned char test_char_data[256] = {
3388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3389 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3392 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3393 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3394 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3395 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3396 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3397 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3398 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3399 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3400 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3401 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3402 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3403 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3420};
3421
Owen Taylor3473f882001-02-23 17:55:21 +00003422/**
3423 * xmlParseCharData:
3424 * @ctxt: an XML parser context
3425 * @cdata: int indicating whether we are within a CDATA section
3426 *
3427 * parse a CharData section.
3428 * if we are within a CDATA section ']]>' marks an end of section.
3429 *
3430 * The right angle bracket (>) may be represented using the string "&gt;",
3431 * and must, for compatibility, be escaped using "&gt;" or a character
3432 * reference when it appears in the string "]]>" in content, when that
3433 * string is not marking the end of a CDATA section.
3434 *
3435 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3436 */
3437
3438void
3439xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003440 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003441 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003442 int line = ctxt->input->line;
3443 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003444 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445
3446 SHRINK;
3447 GROW;
3448 /*
3449 * Accelerated common case where input don't need to be
3450 * modified before passing it to the handler.
3451 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003452 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 in = ctxt->input->cur;
3454 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003455get_more_space:
3456 while (*in == 0x20) in++;
3457 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003458 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003459 in++;
3460 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003461 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003462 in++;
3463 }
3464 goto get_more_space;
3465 }
3466 if (*in == '<') {
3467 nbchar = in - ctxt->input->cur;
3468 if (nbchar > 0) {
3469 const xmlChar *tmp = ctxt->input->cur;
3470 ctxt->input->cur = in;
3471
Daniel Veillard34099b42004-11-04 17:34:35 +00003472 if ((ctxt->sax != NULL) &&
3473 (ctxt->sax->ignorableWhitespace !=
3474 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003475 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003476 if (ctxt->sax->ignorableWhitespace != NULL)
3477 ctxt->sax->ignorableWhitespace(ctxt->userData,
3478 tmp, nbchar);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003479 } else if (ctxt->sax->characters != NULL)
3480 ctxt->sax->characters(ctxt->userData,
3481 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003482 } else if ((ctxt->sax != NULL) &&
3483 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003484 ctxt->sax->characters(ctxt->userData,
3485 tmp, nbchar);
3486 }
3487 }
3488 return;
3489 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003490
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003491get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003492 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003493 while (test_char_data[*in]) {
3494 in++;
3495 ccol++;
3496 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003497 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003498 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003499 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003500 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003501 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003502 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003503 in++;
3504 }
3505 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003506 }
3507 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003508 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003509 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003510 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003511 return;
3512 }
3513 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003514 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003515 goto get_more;
3516 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003517 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003518 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003519 if ((ctxt->sax != NULL) &&
3520 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003521 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003522 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003523 const xmlChar *tmp = ctxt->input->cur;
3524 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003525
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003526 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003527 if (ctxt->sax->ignorableWhitespace != NULL)
3528 ctxt->sax->ignorableWhitespace(ctxt->userData,
3529 tmp, nbchar);
Daniel Veillard40412cd2003-09-03 13:28:32 +00003530 } else if (ctxt->sax->characters != NULL)
3531 ctxt->sax->characters(ctxt->userData,
3532 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003533 line = ctxt->input->line;
3534 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003535 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003536 if (ctxt->sax->characters != NULL)
3537 ctxt->sax->characters(ctxt->userData,
3538 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003539 line = ctxt->input->line;
3540 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003541 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003542 }
3543 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003544 if (*in == 0xD) {
3545 in++;
William M. Brackdc904f12005-10-22 02:04:26 +00003546 if (!*in) /* if end of current chunk return */
3547 return;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003548 if (*in == 0xA) {
3549 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003550 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003551 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003552 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003553 }
William M. Brackf4caa5e2005-10-20 09:04:05 +00003554 if (!*in) /* if end of current chunk return */
3555 return;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003556 in--;
3557 }
3558 if (*in == '<') {
3559 return;
3560 }
3561 if (*in == '&') {
3562 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003563 }
3564 SHRINK;
3565 GROW;
3566 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003567 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003568 nbchar = 0;
3569 }
Daniel Veillard50582112001-03-26 22:52:16 +00003570 ctxt->input->line = line;
3571 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003572 xmlParseCharDataComplex(ctxt, cdata);
3573}
3574
Daniel Veillard01c13b52002-12-10 15:19:08 +00003575/**
3576 * xmlParseCharDataComplex:
3577 * @ctxt: an XML parser context
3578 * @cdata: int indicating whether we are within a CDATA section
3579 *
3580 * parse a CharData section.this is the fallback function
3581 * of xmlParseCharData() when the parsing requires handling
3582 * of non-ASCII characters.
3583 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003584void
3585xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003586 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3587 int nbchar = 0;
3588 int cur, l;
3589 int count = 0;
3590
3591 SHRINK;
3592 GROW;
3593 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003594 while ((cur != '<') && /* checked */
3595 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003596 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003597 if ((cur == ']') && (NXT(1) == ']') &&
3598 (NXT(2) == '>')) {
3599 if (cdata) break;
3600 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003601 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003602 }
3603 }
3604 COPY_BUF(l,buf,nbchar,cur);
3605 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003606 buf[nbchar] = 0;
3607
Owen Taylor3473f882001-02-23 17:55:21 +00003608 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003609 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003610 */
3611 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003612 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003613 if (ctxt->sax->ignorableWhitespace != NULL)
3614 ctxt->sax->ignorableWhitespace(ctxt->userData,
3615 buf, nbchar);
3616 } else {
3617 if (ctxt->sax->characters != NULL)
3618 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3619 }
3620 }
3621 nbchar = 0;
3622 }
3623 count++;
3624 if (count > 50) {
3625 GROW;
3626 count = 0;
3627 }
3628 NEXTL(l);
3629 cur = CUR_CHAR(l);
3630 }
3631 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003632 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003633 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003634 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003635 */
3636 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003637 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003638 if (ctxt->sax->ignorableWhitespace != NULL)
3639 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3640 } else {
3641 if (ctxt->sax->characters != NULL)
3642 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3643 }
3644 }
3645 }
3646}
3647
3648/**
3649 * xmlParseExternalID:
3650 * @ctxt: an XML parser context
3651 * @publicID: a xmlChar** receiving PubidLiteral
3652 * @strict: indicate whether we should restrict parsing to only
3653 * production [75], see NOTE below
3654 *
3655 * Parse an External ID or a Public ID
3656 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003657 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003658 * 'PUBLIC' S PubidLiteral S SystemLiteral
3659 *
3660 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3661 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3662 *
3663 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3664 *
3665 * Returns the function returns SystemLiteral and in the second
3666 * case publicID receives PubidLiteral, is strict is off
3667 * it is possible to return NULL and have publicID set.
3668 */
3669
3670xmlChar *
3671xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3672 xmlChar *URI = NULL;
3673
3674 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003675
3676 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003677 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003678 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003679 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003680 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3681 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003682 }
3683 SKIP_BLANKS;
3684 URI = xmlParseSystemLiteral(ctxt);
3685 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003686 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003687 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003688 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003689 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003690 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003692 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003693 }
3694 SKIP_BLANKS;
3695 *publicID = xmlParsePubidLiteral(ctxt);
3696 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003697 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003698 }
3699 if (strict) {
3700 /*
3701 * We don't handle [83] so "S SystemLiteral" is required.
3702 */
William M. Brack76e95df2003-10-18 16:20:14 +00003703 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003704 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003705 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003706 }
3707 } else {
3708 /*
3709 * We handle [83] so we return immediately, if
3710 * "S SystemLiteral" is not detected. From a purely parsing
3711 * point of view that's a nice mess.
3712 */
3713 const xmlChar *ptr;
3714 GROW;
3715
3716 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003717 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003718
William M. Brack76e95df2003-10-18 16:20:14 +00003719 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3721 }
3722 SKIP_BLANKS;
3723 URI = xmlParseSystemLiteral(ctxt);
3724 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 }
3727 }
3728 return(URI);
3729}
3730
3731/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003732 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003733 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003734 * @buf: the already parsed part of the buffer
3735 * @len: number of bytes filles in the buffer
3736 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003737 *
3738 * Skip an XML (SGML) comment <!-- .... -->
3739 * The spec says that "For compatibility, the string "--" (double-hyphen)
3740 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003741 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003742 *
3743 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3744 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003745static void
3746xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003747 int q, ql;
3748 int r, rl;
3749 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003750 xmlParserInputPtr input = ctxt->input;
3751 int count = 0;
3752
Owen Taylor3473f882001-02-23 17:55:21 +00003753 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003754 len = 0;
3755 size = XML_PARSER_BUFFER_SIZE;
3756 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3757 if (buf == NULL) {
3758 xmlErrMemory(ctxt, NULL);
3759 return;
3760 }
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003763 if (q == 0)
3764 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003765 NEXTL(ql);
3766 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003767 if (r == 0)
3768 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003769 NEXTL(rl);
3770 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003771 if (cur == 0)
3772 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003773 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003774 ((cur != '>') ||
3775 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003776 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 }
3779 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003780 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003781 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003782 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3783 if (new_buf == NULL) {
3784 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003785 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003786 return;
3787 }
William M. Bracka3215c72004-07-31 16:24:01 +00003788 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003789 }
3790 COPY_BUF(ql,buf,len,q);
3791 q = r;
3792 ql = rl;
3793 r = cur;
3794 rl = l;
3795
3796 count++;
3797 if (count > 50) {
3798 GROW;
3799 count = 0;
3800 }
3801 NEXTL(l);
3802 cur = CUR_CHAR(l);
3803 if (cur == 0) {
3804 SHRINK;
3805 GROW;
3806 cur = CUR_CHAR(l);
3807 }
3808 }
3809 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003810 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003811 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003812 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003813 xmlFree(buf);
3814 } else {
3815 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003816 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3817 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003818 }
3819 NEXT;
3820 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3821 (!ctxt->disableSAX))
3822 ctxt->sax->comment(ctxt->userData, buf);
3823 xmlFree(buf);
3824 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003825 return;
3826not_terminated:
3827 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3828 "Comment not terminated\n", NULL);
3829 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003830}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003831/**
3832 * xmlParseComment:
3833 * @ctxt: an XML parser context
3834 *
3835 * Skip an XML (SGML) comment <!-- .... -->
3836 * The spec says that "For compatibility, the string "--" (double-hyphen)
3837 * must not occur within comments. "
3838 *
3839 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3840 */
3841void
3842xmlParseComment(xmlParserCtxtPtr ctxt) {
3843 xmlChar *buf = NULL;
3844 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003845 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003846 xmlParserInputState state;
3847 const xmlChar *in;
3848 int nbchar = 0, ccol;
3849
3850 /*
3851 * Check that there is a comment right here.
3852 */
3853 if ((RAW != '<') || (NXT(1) != '!') ||
3854 (NXT(2) != '-') || (NXT(3) != '-')) return;
3855
3856 state = ctxt->instate;
3857 ctxt->instate = XML_PARSER_COMMENT;
3858 SKIP(4);
3859 SHRINK;
3860 GROW;
3861
3862 /*
3863 * Accelerated common case where input don't need to be
3864 * modified before passing it to the handler.
3865 */
3866 in = ctxt->input->cur;
3867 do {
3868 if (*in == 0xA) {
3869 ctxt->input->line++; ctxt->input->col = 1;
3870 in++;
3871 while (*in == 0xA) {
3872 ctxt->input->line++; ctxt->input->col = 1;
3873 in++;
3874 }
3875 }
3876get_more:
3877 ccol = ctxt->input->col;
3878 while (((*in > '-') && (*in <= 0x7F)) ||
3879 ((*in >= 0x20) && (*in < '-')) ||
3880 (*in == 0x09)) {
3881 in++;
3882 ccol++;
3883 }
3884 ctxt->input->col = ccol;
3885 if (*in == 0xA) {
3886 ctxt->input->line++; ctxt->input->col = 1;
3887 in++;
3888 while (*in == 0xA) {
3889 ctxt->input->line++; ctxt->input->col = 1;
3890 in++;
3891 }
3892 goto get_more;
3893 }
3894 nbchar = in - ctxt->input->cur;
3895 /*
3896 * save current set of data
3897 */
3898 if (nbchar > 0) {
3899 if ((ctxt->sax != NULL) &&
3900 (ctxt->sax->comment != NULL)) {
3901 if (buf == NULL) {
3902 if ((*in == '-') && (in[1] == '-'))
3903 size = nbchar + 1;
3904 else
3905 size = XML_PARSER_BUFFER_SIZE + nbchar;
3906 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3907 if (buf == NULL) {
3908 xmlErrMemory(ctxt, NULL);
3909 ctxt->instate = state;
3910 return;
3911 }
3912 len = 0;
3913 } else if (len + nbchar + 1 >= size) {
3914 xmlChar *new_buf;
3915 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3916 new_buf = (xmlChar *) xmlRealloc(buf,
3917 size * sizeof(xmlChar));
3918 if (new_buf == NULL) {
3919 xmlFree (buf);
3920 xmlErrMemory(ctxt, NULL);
3921 ctxt->instate = state;
3922 return;
3923 }
3924 buf = new_buf;
3925 }
3926 memcpy(&buf[len], ctxt->input->cur, nbchar);
3927 len += nbchar;
3928 buf[len] = 0;
3929 }
3930 }
3931 ctxt->input->cur = in;
3932 if (*in == 0xA)
3933
3934 if (*in == 0xD) {
3935 in++;
3936 if (*in == 0xA) {
3937 ctxt->input->cur = in;
3938 in++;
3939 ctxt->input->line++; ctxt->input->col = 1;
3940 continue; /* while */
3941 }
William M. Brackdc904f12005-10-22 02:04:26 +00003942 if (!*in) /* if end of current chunk return */
3943 return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003944 in--;
3945 }
3946 SHRINK;
3947 GROW;
3948 in = ctxt->input->cur;
3949 if (*in == '-') {
3950 if (in[1] == '-') {
3951 if (in[2] == '>') {
3952 SKIP(3);
3953 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3954 (!ctxt->disableSAX)) {
3955 if (buf != NULL)
3956 ctxt->sax->comment(ctxt->userData, buf);
3957 else
3958 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3959 }
3960 if (buf != NULL)
3961 xmlFree(buf);
3962 ctxt->instate = state;
3963 return;
3964 }
3965 if (buf != NULL)
3966 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3967 "Comment not terminated \n<!--%.50s\n",
3968 buf);
3969 else
3970 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3971 "Comment not terminated \n", NULL);
3972 in++;
3973 ctxt->input->col++;
3974 }
3975 in++;
3976 ctxt->input->col++;
3977 goto get_more;
3978 }
3979 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3980 xmlParseCommentComplex(ctxt, buf, len, size);
3981 ctxt->instate = state;
3982 return;
3983}
3984
Owen Taylor3473f882001-02-23 17:55:21 +00003985
3986/**
3987 * xmlParsePITarget:
3988 * @ctxt: an XML parser context
3989 *
3990 * parse the name of a PI
3991 *
3992 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3993 *
3994 * Returns the PITarget name or NULL
3995 */
3996
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003997const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003998xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003999 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004000
4001 name = xmlParseName(ctxt);
4002 if ((name != NULL) &&
4003 ((name[0] == 'x') || (name[0] == 'X')) &&
4004 ((name[1] == 'm') || (name[1] == 'M')) &&
4005 ((name[2] == 'l') || (name[2] == 'L'))) {
4006 int i;
4007 if ((name[0] == 'x') && (name[1] == 'm') &&
4008 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004009 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004010 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return(name);
4012 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004013 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 return(name);
4015 }
4016 for (i = 0;;i++) {
4017 if (xmlW3CPIs[i] == NULL) break;
4018 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4019 return(name);
4020 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004021 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4022 "xmlParsePITarget: invalid name prefix 'xml'\n",
4023 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004024 }
4025 return(name);
4026}
4027
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004028#ifdef LIBXML_CATALOG_ENABLED
4029/**
4030 * xmlParseCatalogPI:
4031 * @ctxt: an XML parser context
4032 * @catalog: the PI value string
4033 *
4034 * parse an XML Catalog Processing Instruction.
4035 *
4036 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4037 *
4038 * Occurs only if allowed by the user and if happening in the Misc
4039 * part of the document before any doctype informations
4040 * This will add the given catalog to the parsing context in order
4041 * to be used if there is a resolution need further down in the document
4042 */
4043
4044static void
4045xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4046 xmlChar *URL = NULL;
4047 const xmlChar *tmp, *base;
4048 xmlChar marker;
4049
4050 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004051 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004052 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4053 goto error;
4054 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004055 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004056 if (*tmp != '=') {
4057 return;
4058 }
4059 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004060 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004061 marker = *tmp;
4062 if ((marker != '\'') && (marker != '"'))
4063 goto error;
4064 tmp++;
4065 base = tmp;
4066 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4067 if (*tmp == 0)
4068 goto error;
4069 URL = xmlStrndup(base, tmp - base);
4070 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004071 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004072 if (*tmp != 0)
4073 goto error;
4074
4075 if (URL != NULL) {
4076 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4077 xmlFree(URL);
4078 }
4079 return;
4080
4081error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004082 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4083 "Catalog PI syntax error: %s\n",
4084 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004085 if (URL != NULL)
4086 xmlFree(URL);
4087}
4088#endif
4089
Owen Taylor3473f882001-02-23 17:55:21 +00004090/**
4091 * xmlParsePI:
4092 * @ctxt: an XML parser context
4093 *
4094 * parse an XML Processing Instruction.
4095 *
4096 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4097 *
4098 * The processing is transfered to SAX once parsed.
4099 */
4100
4101void
4102xmlParsePI(xmlParserCtxtPtr ctxt) {
4103 xmlChar *buf = NULL;
4104 int len = 0;
4105 int size = XML_PARSER_BUFFER_SIZE;
4106 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004107 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004108 xmlParserInputState state;
4109 int count = 0;
4110
4111 if ((RAW == '<') && (NXT(1) == '?')) {
4112 xmlParserInputPtr input = ctxt->input;
4113 state = ctxt->instate;
4114 ctxt->instate = XML_PARSER_PI;
4115 /*
4116 * this is a Processing Instruction.
4117 */
4118 SKIP(2);
4119 SHRINK;
4120
4121 /*
4122 * Parse the target name and check for special support like
4123 * namespace.
4124 */
4125 target = xmlParsePITarget(ctxt);
4126 if (target != NULL) {
4127 if ((RAW == '?') && (NXT(1) == '>')) {
4128 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004129 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4130 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004131 }
4132 SKIP(2);
4133
4134 /*
4135 * SAX: PI detected.
4136 */
4137 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4138 (ctxt->sax->processingInstruction != NULL))
4139 ctxt->sax->processingInstruction(ctxt->userData,
4140 target, NULL);
4141 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004142 return;
4143 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004144 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004145 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004146 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004147 ctxt->instate = state;
4148 return;
4149 }
4150 cur = CUR;
4151 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004152 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4153 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004154 }
4155 SKIP_BLANKS;
4156 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004157 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004158 ((cur != '?') || (NXT(1) != '>'))) {
4159 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004160 xmlChar *tmp;
4161
Owen Taylor3473f882001-02-23 17:55:21 +00004162 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004163 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4164 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004165 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004166 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004167 ctxt->instate = state;
4168 return;
4169 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004170 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004171 }
4172 count++;
4173 if (count > 50) {
4174 GROW;
4175 count = 0;
4176 }
4177 COPY_BUF(l,buf,len,cur);
4178 NEXTL(l);
4179 cur = CUR_CHAR(l);
4180 if (cur == 0) {
4181 SHRINK;
4182 GROW;
4183 cur = CUR_CHAR(l);
4184 }
4185 }
4186 buf[len] = 0;
4187 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004188 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4189 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004190 } else {
4191 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4193 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004194 }
4195 SKIP(2);
4196
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004197#ifdef LIBXML_CATALOG_ENABLED
4198 if (((state == XML_PARSER_MISC) ||
4199 (state == XML_PARSER_START)) &&
4200 (xmlStrEqual(target, XML_CATALOG_PI))) {
4201 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4202 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4203 (allow == XML_CATA_ALLOW_ALL))
4204 xmlParseCatalogPI(ctxt, buf);
4205 }
4206#endif
4207
4208
Owen Taylor3473f882001-02-23 17:55:21 +00004209 /*
4210 * SAX: PI detected.
4211 */
4212 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4213 (ctxt->sax->processingInstruction != NULL))
4214 ctxt->sax->processingInstruction(ctxt->userData,
4215 target, buf);
4216 }
4217 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004218 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004219 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004220 }
4221 ctxt->instate = state;
4222 }
4223}
4224
4225/**
4226 * xmlParseNotationDecl:
4227 * @ctxt: an XML parser context
4228 *
4229 * parse a notation declaration
4230 *
4231 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4232 *
4233 * Hence there is actually 3 choices:
4234 * 'PUBLIC' S PubidLiteral
4235 * 'PUBLIC' S PubidLiteral S SystemLiteral
4236 * and 'SYSTEM' S SystemLiteral
4237 *
4238 * See the NOTE on xmlParseExternalID().
4239 */
4240
4241void
4242xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004243 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004244 xmlChar *Pubid;
4245 xmlChar *Systemid;
4246
Daniel Veillarda07050d2003-10-19 14:46:32 +00004247 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004248 xmlParserInputPtr input = ctxt->input;
4249 SHRINK;
4250 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004251 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4253 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 return;
4255 }
4256 SKIP_BLANKS;
4257
Daniel Veillard76d66f42001-05-16 21:05:17 +00004258 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004259 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004260 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004261 return;
4262 }
William M. Brack76e95df2003-10-18 16:20:14 +00004263 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004265 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004266 return;
4267 }
4268 SKIP_BLANKS;
4269
4270 /*
4271 * Parse the IDs.
4272 */
4273 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4274 SKIP_BLANKS;
4275
4276 if (RAW == '>') {
4277 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004278 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4279 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004280 }
4281 NEXT;
4282 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4283 (ctxt->sax->notationDecl != NULL))
4284 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4285 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004286 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004287 }
Owen Taylor3473f882001-02-23 17:55:21 +00004288 if (Systemid != NULL) xmlFree(Systemid);
4289 if (Pubid != NULL) xmlFree(Pubid);
4290 }
4291}
4292
4293/**
4294 * xmlParseEntityDecl:
4295 * @ctxt: an XML parser context
4296 *
4297 * parse <!ENTITY declarations
4298 *
4299 * [70] EntityDecl ::= GEDecl | PEDecl
4300 *
4301 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4302 *
4303 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4304 *
4305 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4306 *
4307 * [74] PEDef ::= EntityValue | ExternalID
4308 *
4309 * [76] NDataDecl ::= S 'NDATA' S Name
4310 *
4311 * [ VC: Notation Declared ]
4312 * The Name must match the declared name of a notation.
4313 */
4314
4315void
4316xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004317 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 xmlChar *value = NULL;
4319 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004320 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004321 int isParameter = 0;
4322 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004323 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004324
Daniel Veillard4c778d82005-01-23 17:37:44 +00004325 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004326 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004328 SHRINK;
4329 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004330 skipped = SKIP_BLANKS;
4331 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4333 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
Owen Taylor3473f882001-02-23 17:55:21 +00004335
4336 if (RAW == '%') {
4337 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004338 skipped = SKIP_BLANKS;
4339 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4341 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
Owen Taylor3473f882001-02-23 17:55:21 +00004343 isParameter = 1;
4344 }
4345
Daniel Veillard76d66f42001-05-16 21:05:17 +00004346 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004347 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004348 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4349 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004350 return;
4351 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004352 skipped = SKIP_BLANKS;
4353 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004354 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4355 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004356 }
Owen Taylor3473f882001-02-23 17:55:21 +00004357
Daniel Veillardf5582f12002-06-11 10:08:16 +00004358 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 /*
4360 * handle the various case of definitions...
4361 */
4362 if (isParameter) {
4363 if ((RAW == '"') || (RAW == '\'')) {
4364 value = xmlParseEntityValue(ctxt, &orig);
4365 if (value) {
4366 if ((ctxt->sax != NULL) &&
4367 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4368 ctxt->sax->entityDecl(ctxt->userData, name,
4369 XML_INTERNAL_PARAMETER_ENTITY,
4370 NULL, NULL, value);
4371 }
4372 } else {
4373 URI = xmlParseExternalID(ctxt, &literal, 1);
4374 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004375 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004376 }
4377 if (URI) {
4378 xmlURIPtr uri;
4379
4380 uri = xmlParseURI((const char *) URI);
4381 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004382 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4383 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004384 /*
4385 * This really ought to be a well formedness error
4386 * but the XML Core WG decided otherwise c.f. issue
4387 * E26 of the XML erratas.
4388 */
Owen Taylor3473f882001-02-23 17:55:21 +00004389 } else {
4390 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004391 /*
4392 * Okay this is foolish to block those but not
4393 * invalid URIs.
4394 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004395 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 } else {
4397 if ((ctxt->sax != NULL) &&
4398 (!ctxt->disableSAX) &&
4399 (ctxt->sax->entityDecl != NULL))
4400 ctxt->sax->entityDecl(ctxt->userData, name,
4401 XML_EXTERNAL_PARAMETER_ENTITY,
4402 literal, URI, NULL);
4403 }
4404 xmlFreeURI(uri);
4405 }
4406 }
4407 }
4408 } else {
4409 if ((RAW == '"') || (RAW == '\'')) {
4410 value = xmlParseEntityValue(ctxt, &orig);
4411 if ((ctxt->sax != NULL) &&
4412 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4413 ctxt->sax->entityDecl(ctxt->userData, name,
4414 XML_INTERNAL_GENERAL_ENTITY,
4415 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004416 /*
4417 * For expat compatibility in SAX mode.
4418 */
4419 if ((ctxt->myDoc == NULL) ||
4420 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4421 if (ctxt->myDoc == NULL) {
4422 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4423 }
4424 if (ctxt->myDoc->intSubset == NULL)
4425 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4426 BAD_CAST "fake", NULL, NULL);
4427
Daniel Veillard1af9a412003-08-20 22:54:39 +00004428 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4429 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004430 }
Owen Taylor3473f882001-02-23 17:55:21 +00004431 } else {
4432 URI = xmlParseExternalID(ctxt, &literal, 1);
4433 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004434 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004435 }
4436 if (URI) {
4437 xmlURIPtr uri;
4438
4439 uri = xmlParseURI((const char *)URI);
4440 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004441 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4442 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004443 /*
4444 * This really ought to be a well formedness error
4445 * but the XML Core WG decided otherwise c.f. issue
4446 * E26 of the XML erratas.
4447 */
Owen Taylor3473f882001-02-23 17:55:21 +00004448 } else {
4449 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004450 /*
4451 * Okay this is foolish to block those but not
4452 * invalid URIs.
4453 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004454 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004455 }
4456 xmlFreeURI(uri);
4457 }
4458 }
William M. Brack76e95df2003-10-18 16:20:14 +00004459 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004460 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4461 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004462 }
4463 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004464 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004465 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004466 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4468 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004469 }
4470 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004471 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004472 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4473 (ctxt->sax->unparsedEntityDecl != NULL))
4474 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4475 literal, URI, ndata);
4476 } else {
4477 if ((ctxt->sax != NULL) &&
4478 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4479 ctxt->sax->entityDecl(ctxt->userData, name,
4480 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4481 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004482 /*
4483 * For expat compatibility in SAX mode.
4484 * assuming the entity repalcement was asked for
4485 */
4486 if ((ctxt->replaceEntities != 0) &&
4487 ((ctxt->myDoc == NULL) ||
4488 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4489 if (ctxt->myDoc == NULL) {
4490 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4491 }
4492
4493 if (ctxt->myDoc->intSubset == NULL)
4494 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4495 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004496 xmlSAX2EntityDecl(ctxt, name,
4497 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4498 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004499 }
Owen Taylor3473f882001-02-23 17:55:21 +00004500 }
4501 }
4502 }
4503 SKIP_BLANKS;
4504 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004505 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004506 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004507 } else {
4508 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4510 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 NEXT;
4513 }
4514 if (orig != NULL) {
4515 /*
4516 * Ugly mechanism to save the raw entity value.
4517 */
4518 xmlEntityPtr cur = NULL;
4519
4520 if (isParameter) {
4521 if ((ctxt->sax != NULL) &&
4522 (ctxt->sax->getParameterEntity != NULL))
4523 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4524 } else {
4525 if ((ctxt->sax != NULL) &&
4526 (ctxt->sax->getEntity != NULL))
4527 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004528 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004529 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004530 }
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532 if (cur != NULL) {
4533 if (cur->orig != NULL)
4534 xmlFree(orig);
4535 else
4536 cur->orig = orig;
4537 } else
4538 xmlFree(orig);
4539 }
Owen Taylor3473f882001-02-23 17:55:21 +00004540 if (value != NULL) xmlFree(value);
4541 if (URI != NULL) xmlFree(URI);
4542 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004543 }
4544}
4545
4546/**
4547 * xmlParseDefaultDecl:
4548 * @ctxt: an XML parser context
4549 * @value: Receive a possible fixed default value for the attribute
4550 *
4551 * Parse an attribute default declaration
4552 *
4553 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4554 *
4555 * [ VC: Required Attribute ]
4556 * if the default declaration is the keyword #REQUIRED, then the
4557 * attribute must be specified for all elements of the type in the
4558 * attribute-list declaration.
4559 *
4560 * [ VC: Attribute Default Legal ]
4561 * The declared default value must meet the lexical constraints of
4562 * the declared attribute type c.f. xmlValidateAttributeDecl()
4563 *
4564 * [ VC: Fixed Attribute Default ]
4565 * if an attribute has a default value declared with the #FIXED
4566 * keyword, instances of that attribute must match the default value.
4567 *
4568 * [ WFC: No < in Attribute Values ]
4569 * handled in xmlParseAttValue()
4570 *
4571 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4572 * or XML_ATTRIBUTE_FIXED.
4573 */
4574
4575int
4576xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4577 int val;
4578 xmlChar *ret;
4579
4580 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004581 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004582 SKIP(9);
4583 return(XML_ATTRIBUTE_REQUIRED);
4584 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004585 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 SKIP(8);
4587 return(XML_ATTRIBUTE_IMPLIED);
4588 }
4589 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004590 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004591 SKIP(6);
4592 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004593 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004594 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4595 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004596 }
4597 SKIP_BLANKS;
4598 }
4599 ret = xmlParseAttValue(ctxt);
4600 ctxt->instate = XML_PARSER_DTD;
4601 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004602 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004603 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004604 } else
4605 *value = ret;
4606 return(val);
4607}
4608
4609/**
4610 * xmlParseNotationType:
4611 * @ctxt: an XML parser context
4612 *
4613 * parse an Notation attribute type.
4614 *
4615 * Note: the leading 'NOTATION' S part has already being parsed...
4616 *
4617 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4618 *
4619 * [ VC: Notation Attributes ]
4620 * Values of this type must match one of the notation names included
4621 * in the declaration; all notation names in the declaration must be declared.
4622 *
4623 * Returns: the notation attribute tree built while parsing
4624 */
4625
4626xmlEnumerationPtr
4627xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004628 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004629 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4630
4631 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004632 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 return(NULL);
4634 }
4635 SHRINK;
4636 do {
4637 NEXT;
4638 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004639 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004640 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004641 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4642 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004643 return(ret);
4644 }
4645 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004646 if (cur == NULL) return(ret);
4647 if (last == NULL) ret = last = cur;
4648 else {
4649 last->next = cur;
4650 last = cur;
4651 }
4652 SKIP_BLANKS;
4653 } while (RAW == '|');
4654 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004655 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004656 if ((last != NULL) && (last != ret))
4657 xmlFreeEnumeration(last);
4658 return(ret);
4659 }
4660 NEXT;
4661 return(ret);
4662}
4663
4664/**
4665 * xmlParseEnumerationType:
4666 * @ctxt: an XML parser context
4667 *
4668 * parse an Enumeration attribute type.
4669 *
4670 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4671 *
4672 * [ VC: Enumeration ]
4673 * Values of this type must match one of the Nmtoken tokens in
4674 * the declaration
4675 *
4676 * Returns: the enumeration attribute tree built while parsing
4677 */
4678
4679xmlEnumerationPtr
4680xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4681 xmlChar *name;
4682 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4683
4684 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004685 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 return(NULL);
4687 }
4688 SHRINK;
4689 do {
4690 NEXT;
4691 SKIP_BLANKS;
4692 name = xmlParseNmtoken(ctxt);
4693 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004694 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 return(ret);
4696 }
4697 cur = xmlCreateEnumeration(name);
4698 xmlFree(name);
4699 if (cur == NULL) return(ret);
4700 if (last == NULL) ret = last = cur;
4701 else {
4702 last->next = cur;
4703 last = cur;
4704 }
4705 SKIP_BLANKS;
4706 } while (RAW == '|');
4707 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004708 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004709 return(ret);
4710 }
4711 NEXT;
4712 return(ret);
4713}
4714
4715/**
4716 * xmlParseEnumeratedType:
4717 * @ctxt: an XML parser context
4718 * @tree: the enumeration tree built while parsing
4719 *
4720 * parse an Enumerated attribute type.
4721 *
4722 * [57] EnumeratedType ::= NotationType | Enumeration
4723 *
4724 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4725 *
4726 *
4727 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4728 */
4729
4730int
4731xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004732 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004733 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004734 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004735 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4736 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004737 return(0);
4738 }
4739 SKIP_BLANKS;
4740 *tree = xmlParseNotationType(ctxt);
4741 if (*tree == NULL) return(0);
4742 return(XML_ATTRIBUTE_NOTATION);
4743 }
4744 *tree = xmlParseEnumerationType(ctxt);
4745 if (*tree == NULL) return(0);
4746 return(XML_ATTRIBUTE_ENUMERATION);
4747}
4748
4749/**
4750 * xmlParseAttributeType:
4751 * @ctxt: an XML parser context
4752 * @tree: the enumeration tree built while parsing
4753 *
4754 * parse the Attribute list def for an element
4755 *
4756 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4757 *
4758 * [55] StringType ::= 'CDATA'
4759 *
4760 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4761 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4762 *
4763 * Validity constraints for attribute values syntax are checked in
4764 * xmlValidateAttributeValue()
4765 *
4766 * [ VC: ID ]
4767 * Values of type ID must match the Name production. A name must not
4768 * appear more than once in an XML document as a value of this type;
4769 * i.e., ID values must uniquely identify the elements which bear them.
4770 *
4771 * [ VC: One ID per Element Type ]
4772 * No element type may have more than one ID attribute specified.
4773 *
4774 * [ VC: ID Attribute Default ]
4775 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4776 *
4777 * [ VC: IDREF ]
4778 * Values of type IDREF must match the Name production, and values
4779 * of type IDREFS must match Names; each IDREF Name must match the value
4780 * of an ID attribute on some element in the XML document; i.e. IDREF
4781 * values must match the value of some ID attribute.
4782 *
4783 * [ VC: Entity Name ]
4784 * Values of type ENTITY must match the Name production, values
4785 * of type ENTITIES must match Names; each Entity Name must match the
4786 * name of an unparsed entity declared in the DTD.
4787 *
4788 * [ VC: Name Token ]
4789 * Values of type NMTOKEN must match the Nmtoken production; values
4790 * of type NMTOKENS must match Nmtokens.
4791 *
4792 * Returns the attribute type
4793 */
4794int
4795xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4796 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004797 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004798 SKIP(5);
4799 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004800 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004801 SKIP(6);
4802 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004803 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004804 SKIP(5);
4805 return(XML_ATTRIBUTE_IDREF);
4806 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4807 SKIP(2);
4808 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004809 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004810 SKIP(6);
4811 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004812 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004813 SKIP(8);
4814 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004815 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004816 SKIP(8);
4817 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004818 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004819 SKIP(7);
4820 return(XML_ATTRIBUTE_NMTOKEN);
4821 }
4822 return(xmlParseEnumeratedType(ctxt, tree));
4823}
4824
4825/**
4826 * xmlParseAttributeListDecl:
4827 * @ctxt: an XML parser context
4828 *
4829 * : parse the Attribute list def for an element
4830 *
4831 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4832 *
4833 * [53] AttDef ::= S Name S AttType S DefaultDecl
4834 *
4835 */
4836void
4837xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004838 const xmlChar *elemName;
4839 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004840 xmlEnumerationPtr tree;
4841
Daniel Veillarda07050d2003-10-19 14:46:32 +00004842 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004843 xmlParserInputPtr input = ctxt->input;
4844
4845 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004846 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004847 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004848 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
4850 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004851 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004852 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004853 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4854 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004855 return;
4856 }
4857 SKIP_BLANKS;
4858 GROW;
4859 while (RAW != '>') {
4860 const xmlChar *check = CUR_PTR;
4861 int type;
4862 int def;
4863 xmlChar *defaultValue = NULL;
4864
4865 GROW;
4866 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004867 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004868 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4870 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004871 break;
4872 }
4873 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004874 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004876 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004877 if (defaultValue != NULL)
4878 xmlFree(defaultValue);
4879 break;
4880 }
4881 SKIP_BLANKS;
4882
4883 type = xmlParseAttributeType(ctxt, &tree);
4884 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004885 if (defaultValue != NULL)
4886 xmlFree(defaultValue);
4887 break;
4888 }
4889
4890 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004891 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004892 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4893 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004894 if (defaultValue != NULL)
4895 xmlFree(defaultValue);
4896 if (tree != NULL)
4897 xmlFreeEnumeration(tree);
4898 break;
4899 }
4900 SKIP_BLANKS;
4901
4902 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4903 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004904 if (defaultValue != NULL)
4905 xmlFree(defaultValue);
4906 if (tree != NULL)
4907 xmlFreeEnumeration(tree);
4908 break;
4909 }
4910
4911 GROW;
4912 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004913 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004915 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004916 if (defaultValue != NULL)
4917 xmlFree(defaultValue);
4918 if (tree != NULL)
4919 xmlFreeEnumeration(tree);
4920 break;
4921 }
4922 SKIP_BLANKS;
4923 }
4924 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004925 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4926 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004927 if (defaultValue != NULL)
4928 xmlFree(defaultValue);
4929 if (tree != NULL)
4930 xmlFreeEnumeration(tree);
4931 break;
4932 }
4933 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4934 (ctxt->sax->attributeDecl != NULL))
4935 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4936 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004937 else if (tree != NULL)
4938 xmlFreeEnumeration(tree);
4939
4940 if ((ctxt->sax2) && (defaultValue != NULL) &&
4941 (def != XML_ATTRIBUTE_IMPLIED) &&
4942 (def != XML_ATTRIBUTE_REQUIRED)) {
4943 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4944 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004945 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4946 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4947 }
Owen Taylor3473f882001-02-23 17:55:21 +00004948 if (defaultValue != NULL)
4949 xmlFree(defaultValue);
4950 GROW;
4951 }
4952 if (RAW == '>') {
4953 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004954 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4955 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004956 }
4957 NEXT;
4958 }
Owen Taylor3473f882001-02-23 17:55:21 +00004959 }
4960}
4961
4962/**
4963 * xmlParseElementMixedContentDecl:
4964 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004965 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004966 *
4967 * parse the declaration for a Mixed Element content
4968 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4969 *
4970 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4971 * '(' S? '#PCDATA' S? ')'
4972 *
4973 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4974 *
4975 * [ VC: No Duplicate Types ]
4976 * The same name must not appear more than once in a single
4977 * mixed-content declaration.
4978 *
4979 * returns: the list of the xmlElementContentPtr describing the element choices
4980 */
4981xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004982xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004983 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004984 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004985
4986 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004987 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004988 SKIP(7);
4989 SKIP_BLANKS;
4990 SHRINK;
4991 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004992 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004993 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4994"Element content declaration doesn't start and stop in the same entity\n",
4995 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004996 }
Owen Taylor3473f882001-02-23 17:55:21 +00004997 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004998 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004999 if (RAW == '*') {
5000 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5001 NEXT;
5002 }
5003 return(ret);
5004 }
5005 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005006 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005007 if (ret == NULL) return(NULL);
5008 }
5009 while (RAW == '|') {
5010 NEXT;
5011 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005012 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005013 if (ret == NULL) return(NULL);
5014 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005015 if (cur != NULL)
5016 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005017 cur = ret;
5018 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005019 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005020 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005021 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005022 if (n->c1 != NULL)
5023 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005024 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005025 if (n != NULL)
5026 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005027 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005028 }
5029 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005030 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005031 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005032 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005033 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005034 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005035 return(NULL);
5036 }
5037 SKIP_BLANKS;
5038 GROW;
5039 }
5040 if ((RAW == ')') && (NXT(1) == '*')) {
5041 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005042 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005043 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005044 if (cur->c2 != NULL)
5045 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005046 }
5047 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005048 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005049 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5050"Element content declaration doesn't start and stop in the same entity\n",
5051 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005052 }
Owen Taylor3473f882001-02-23 17:55:21 +00005053 SKIP(2);
5054 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005055 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005056 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005057 return(NULL);
5058 }
5059
5060 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005061 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005062 }
5063 return(ret);
5064}
5065
5066/**
5067 * xmlParseElementChildrenContentDecl:
5068 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005069 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005070 *
5071 * parse the declaration for a Mixed Element content
5072 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5073 *
5074 *
5075 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5076 *
5077 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5078 *
5079 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5080 *
5081 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5082 *
5083 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5084 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005085 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * opening or closing parentheses in a choice, seq, or Mixed
5087 * construct is contained in the replacement text for a parameter
5088 * entity, both must be contained in the same replacement text. For
5089 * interoperability, if a parameter-entity reference appears in a
5090 * choice, seq, or Mixed construct, its replacement text should not
5091 * be empty, and neither the first nor last non-blank character of
5092 * the replacement text should be a connector (| or ,).
5093 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005094 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005095 * hierarchy.
5096 */
5097xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005098xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005099 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005100 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005101 xmlChar type = 0;
5102
5103 SKIP_BLANKS;
5104 GROW;
5105 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005106 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005107
Owen Taylor3473f882001-02-23 17:55:21 +00005108 /* Recurse on first child */
5109 NEXT;
5110 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005111 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005112 SKIP_BLANKS;
5113 GROW;
5114 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005115 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005116 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005117 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005118 return(NULL);
5119 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005120 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005121 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005122 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005123 return(NULL);
5124 }
Owen Taylor3473f882001-02-23 17:55:21 +00005125 GROW;
5126 if (RAW == '?') {
5127 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5128 NEXT;
5129 } else if (RAW == '*') {
5130 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5131 NEXT;
5132 } else if (RAW == '+') {
5133 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5134 NEXT;
5135 } else {
5136 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5137 }
Owen Taylor3473f882001-02-23 17:55:21 +00005138 GROW;
5139 }
5140 SKIP_BLANKS;
5141 SHRINK;
5142 while (RAW != ')') {
5143 /*
5144 * Each loop we parse one separator and one element.
5145 */
5146 if (RAW == ',') {
5147 if (type == 0) type = CUR;
5148
5149 /*
5150 * Detect "Name | Name , Name" error
5151 */
5152 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005153 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005154 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005155 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005156 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005157 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005158 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005159 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005160 return(NULL);
5161 }
5162 NEXT;
5163
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005164 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005165 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005166 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005167 xmlFreeDocElementContent(ctxt->myDoc, last);
5168 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005169 return(NULL);
5170 }
5171 if (last == NULL) {
5172 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005173 if (ret != NULL)
5174 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005175 ret = cur = op;
5176 } else {
5177 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005178 if (op != NULL)
5179 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005180 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005181 if (last != NULL)
5182 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005183 cur =op;
5184 last = NULL;
5185 }
5186 } else if (RAW == '|') {
5187 if (type == 0) type = CUR;
5188
5189 /*
5190 * Detect "Name , Name | Name" error
5191 */
5192 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005193 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005194 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005196 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005197 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005199 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 return(NULL);
5201 }
5202 NEXT;
5203
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005204 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005206 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005207 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005208 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005209 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 return(NULL);
5211 }
5212 if (last == NULL) {
5213 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005214 if (ret != NULL)
5215 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005216 ret = cur = op;
5217 } else {
5218 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005219 if (op != NULL)
5220 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005221 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005222 if (last != NULL)
5223 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005224 cur =op;
5225 last = NULL;
5226 }
5227 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005230 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 return(NULL);
5232 }
5233 GROW;
5234 SKIP_BLANKS;
5235 GROW;
5236 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005237 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005238 /* Recurse on second child */
5239 NEXT;
5240 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005241 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005242 SKIP_BLANKS;
5243 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005244 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005245 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005246 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005247 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005248 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005249 return(NULL);
5250 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005251 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00005252 if (RAW == '?') {
5253 last->ocur = XML_ELEMENT_CONTENT_OPT;
5254 NEXT;
5255 } else if (RAW == '*') {
5256 last->ocur = XML_ELEMENT_CONTENT_MULT;
5257 NEXT;
5258 } else if (RAW == '+') {
5259 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5260 NEXT;
5261 } else {
5262 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5263 }
5264 }
5265 SKIP_BLANKS;
5266 GROW;
5267 }
5268 if ((cur != NULL) && (last != NULL)) {
5269 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005270 if (last != NULL)
5271 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005272 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005273 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005274 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5275"Element content declaration doesn't start and stop in the same entity\n",
5276 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005277 }
Owen Taylor3473f882001-02-23 17:55:21 +00005278 NEXT;
5279 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005280 if (ret != NULL) {
5281 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5282 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5283 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5284 else
5285 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5286 }
Owen Taylor3473f882001-02-23 17:55:21 +00005287 NEXT;
5288 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005289 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005290 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005291 cur = ret;
5292 /*
5293 * Some normalization:
5294 * (a | b* | c?)* == (a | b | c)*
5295 */
5296 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5297 if ((cur->c1 != NULL) &&
5298 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5299 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5300 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5301 if ((cur->c2 != NULL) &&
5302 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5303 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5304 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5305 cur = cur->c2;
5306 }
5307 }
Owen Taylor3473f882001-02-23 17:55:21 +00005308 NEXT;
5309 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005310 if (ret != NULL) {
5311 int found = 0;
5312
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005313 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5314 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5315 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005316 else
5317 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005318 /*
5319 * Some normalization:
5320 * (a | b*)+ == (a | b)*
5321 * (a | b?)+ == (a | b)*
5322 */
5323 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5324 if ((cur->c1 != NULL) &&
5325 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5326 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5327 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5328 found = 1;
5329 }
5330 if ((cur->c2 != NULL) &&
5331 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5332 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5333 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5334 found = 1;
5335 }
5336 cur = cur->c2;
5337 }
5338 if (found)
5339 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5340 }
Owen Taylor3473f882001-02-23 17:55:21 +00005341 NEXT;
5342 }
5343 return(ret);
5344}
5345
5346/**
5347 * xmlParseElementContentDecl:
5348 * @ctxt: an XML parser context
5349 * @name: the name of the element being defined.
5350 * @result: the Element Content pointer will be stored here if any
5351 *
5352 * parse the declaration for an Element content either Mixed or Children,
5353 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5354 *
5355 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5356 *
5357 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5358 */
5359
5360int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005361xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005362 xmlElementContentPtr *result) {
5363
5364 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005365 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005366 int res;
5367
5368 *result = NULL;
5369
5370 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005371 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005372 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005373 return(-1);
5374 }
5375 NEXT;
5376 GROW;
5377 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005378 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005379 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 res = XML_ELEMENT_TYPE_MIXED;
5381 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005382 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005383 res = XML_ELEMENT_TYPE_ELEMENT;
5384 }
Owen Taylor3473f882001-02-23 17:55:21 +00005385 SKIP_BLANKS;
5386 *result = tree;
5387 return(res);
5388}
5389
5390/**
5391 * xmlParseElementDecl:
5392 * @ctxt: an XML parser context
5393 *
5394 * parse an Element declaration.
5395 *
5396 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5397 *
5398 * [ VC: Unique Element Type Declaration ]
5399 * No element type may be declared more than once
5400 *
5401 * Returns the type of the element, or -1 in case of error
5402 */
5403int
5404xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005405 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005406 int ret = -1;
5407 xmlElementContentPtr content = NULL;
5408
Daniel Veillard4c778d82005-01-23 17:37:44 +00005409 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005410 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005411 xmlParserInputPtr input = ctxt->input;
5412
5413 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005414 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005417 }
5418 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005419 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5422 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005423 return(-1);
5424 }
5425 while ((RAW == 0) && (ctxt->inputNr > 1))
5426 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005427 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005428 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5429 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005430 }
5431 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005432 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005433 SKIP(5);
5434 /*
5435 * Element must always be empty.
5436 */
5437 ret = XML_ELEMENT_TYPE_EMPTY;
5438 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5439 (NXT(2) == 'Y')) {
5440 SKIP(3);
5441 /*
5442 * Element is a generic container.
5443 */
5444 ret = XML_ELEMENT_TYPE_ANY;
5445 } else if (RAW == '(') {
5446 ret = xmlParseElementContentDecl(ctxt, name, &content);
5447 } else {
5448 /*
5449 * [ WFC: PEs in Internal Subset ] error handling.
5450 */
5451 if ((RAW == '%') && (ctxt->external == 0) &&
5452 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005453 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005455 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005456 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005457 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5458 }
Owen Taylor3473f882001-02-23 17:55:21 +00005459 return(-1);
5460 }
5461
5462 SKIP_BLANKS;
5463 /*
5464 * Pop-up of finished entities.
5465 */
5466 while ((RAW == 0) && (ctxt->inputNr > 1))
5467 xmlPopInput(ctxt);
5468 SKIP_BLANKS;
5469
5470 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005471 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005472 if (content != NULL) {
5473 xmlFreeDocElementContent(ctxt->myDoc, content);
5474 }
Owen Taylor3473f882001-02-23 17:55:21 +00005475 } else {
5476 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005477 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5478 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005479 }
5480
5481 NEXT;
5482 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005483 (ctxt->sax->elementDecl != NULL)) {
5484 if (content != NULL)
5485 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005486 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5487 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005488 if ((content != NULL) && (content->parent == NULL)) {
5489 /*
5490 * this is a trick: if xmlAddElementDecl is called,
5491 * instead of copying the full tree it is plugged directly
5492 * if called from the parser. Avoid duplicating the
5493 * interfaces or change the API/ABI
5494 */
5495 xmlFreeDocElementContent(ctxt->myDoc, content);
5496 }
5497 } else if (content != NULL) {
5498 xmlFreeDocElementContent(ctxt->myDoc, content);
5499 }
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
Owen Taylor3473f882001-02-23 17:55:21 +00005501 }
5502 return(ret);
5503}
5504
5505/**
Owen Taylor3473f882001-02-23 17:55:21 +00005506 * xmlParseConditionalSections
5507 * @ctxt: an XML parser context
5508 *
5509 * [61] conditionalSect ::= includeSect | ignoreSect
5510 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5511 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5512 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5513 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5514 */
5515
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005516static void
Owen Taylor3473f882001-02-23 17:55:21 +00005517xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5518 SKIP(3);
5519 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005520 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005521 SKIP(7);
5522 SKIP_BLANKS;
5523 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005524 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005525 } else {
5526 NEXT;
5527 }
5528 if (xmlParserDebugEntities) {
5529 if ((ctxt->input != NULL) && (ctxt->input->filename))
5530 xmlGenericError(xmlGenericErrorContext,
5531 "%s(%d): ", ctxt->input->filename,
5532 ctxt->input->line);
5533 xmlGenericError(xmlGenericErrorContext,
5534 "Entering INCLUDE Conditional Section\n");
5535 }
5536
5537 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5538 (NXT(2) != '>'))) {
5539 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005540 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005541
5542 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5543 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005544 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005545 NEXT;
5546 } else if (RAW == '%') {
5547 xmlParsePEReference(ctxt);
5548 } else
5549 xmlParseMarkupDecl(ctxt);
5550
5551 /*
5552 * Pop-up of finished entities.
5553 */
5554 while ((RAW == 0) && (ctxt->inputNr > 1))
5555 xmlPopInput(ctxt);
5556
Daniel Veillardfdc91562002-07-01 21:52:03 +00005557 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005558 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005559 break;
5560 }
5561 }
5562 if (xmlParserDebugEntities) {
5563 if ((ctxt->input != NULL) && (ctxt->input->filename))
5564 xmlGenericError(xmlGenericErrorContext,
5565 "%s(%d): ", ctxt->input->filename,
5566 ctxt->input->line);
5567 xmlGenericError(xmlGenericErrorContext,
5568 "Leaving INCLUDE Conditional Section\n");
5569 }
5570
Daniel Veillarda07050d2003-10-19 14:46:32 +00005571 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005572 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005573 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005574 int depth = 0;
5575
5576 SKIP(6);
5577 SKIP_BLANKS;
5578 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005579 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 } else {
5581 NEXT;
5582 }
5583 if (xmlParserDebugEntities) {
5584 if ((ctxt->input != NULL) && (ctxt->input->filename))
5585 xmlGenericError(xmlGenericErrorContext,
5586 "%s(%d): ", ctxt->input->filename,
5587 ctxt->input->line);
5588 xmlGenericError(xmlGenericErrorContext,
5589 "Entering IGNORE Conditional Section\n");
5590 }
5591
5592 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005593 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005594 * But disable SAX event generating DTD building in the meantime
5595 */
5596 state = ctxt->disableSAX;
5597 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005598 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005599 ctxt->instate = XML_PARSER_IGNORE;
5600
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005601 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005602 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5603 depth++;
5604 SKIP(3);
5605 continue;
5606 }
5607 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5608 if (--depth >= 0) SKIP(3);
5609 continue;
5610 }
5611 NEXT;
5612 continue;
5613 }
5614
5615 ctxt->disableSAX = state;
5616 ctxt->instate = instate;
5617
5618 if (xmlParserDebugEntities) {
5619 if ((ctxt->input != NULL) && (ctxt->input->filename))
5620 xmlGenericError(xmlGenericErrorContext,
5621 "%s(%d): ", ctxt->input->filename,
5622 ctxt->input->line);
5623 xmlGenericError(xmlGenericErrorContext,
5624 "Leaving IGNORE Conditional Section\n");
5625 }
5626
5627 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005628 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005629 }
5630
5631 if (RAW == 0)
5632 SHRINK;
5633
5634 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005635 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005636 } else {
5637 SKIP(3);
5638 }
5639}
5640
5641/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005642 * xmlParseMarkupDecl:
5643 * @ctxt: an XML parser context
5644 *
5645 * parse Markup declarations
5646 *
5647 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5648 * NotationDecl | PI | Comment
5649 *
5650 * [ VC: Proper Declaration/PE Nesting ]
5651 * Parameter-entity replacement text must be properly nested with
5652 * markup declarations. That is to say, if either the first character
5653 * or the last character of a markup declaration (markupdecl above) is
5654 * contained in the replacement text for a parameter-entity reference,
5655 * both must be contained in the same replacement text.
5656 *
5657 * [ WFC: PEs in Internal Subset ]
5658 * In the internal DTD subset, parameter-entity references can occur
5659 * only where markup declarations can occur, not within markup declarations.
5660 * (This does not apply to references that occur in external parameter
5661 * entities or to the external subset.)
5662 */
5663void
5664xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5665 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005666 if (CUR == '<') {
5667 if (NXT(1) == '!') {
5668 switch (NXT(2)) {
5669 case 'E':
5670 if (NXT(3) == 'L')
5671 xmlParseElementDecl(ctxt);
5672 else if (NXT(3) == 'N')
5673 xmlParseEntityDecl(ctxt);
5674 break;
5675 case 'A':
5676 xmlParseAttributeListDecl(ctxt);
5677 break;
5678 case 'N':
5679 xmlParseNotationDecl(ctxt);
5680 break;
5681 case '-':
5682 xmlParseComment(ctxt);
5683 break;
5684 default:
5685 /* there is an error but it will be detected later */
5686 break;
5687 }
5688 } else if (NXT(1) == '?') {
5689 xmlParsePI(ctxt);
5690 }
5691 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005692 /*
5693 * This is only for internal subset. On external entities,
5694 * the replacement is done before parsing stage
5695 */
5696 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5697 xmlParsePEReference(ctxt);
5698
5699 /*
5700 * Conditional sections are allowed from entities included
5701 * by PE References in the internal subset.
5702 */
5703 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5704 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5705 xmlParseConditionalSections(ctxt);
5706 }
5707 }
5708
5709 ctxt->instate = XML_PARSER_DTD;
5710}
5711
5712/**
5713 * xmlParseTextDecl:
5714 * @ctxt: an XML parser context
5715 *
5716 * parse an XML declaration header for external entities
5717 *
5718 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5719 *
5720 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5721 */
5722
5723void
5724xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5725 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005726 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005727
5728 /*
5729 * We know that '<?xml' is here.
5730 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005731 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005732 SKIP(5);
5733 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005734 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005735 return;
5736 }
5737
William M. Brack76e95df2003-10-18 16:20:14 +00005738 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005739 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5740 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005741 }
5742 SKIP_BLANKS;
5743
5744 /*
5745 * We may have the VersionInfo here.
5746 */
5747 version = xmlParseVersionInfo(ctxt);
5748 if (version == NULL)
5749 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005750 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005751 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5753 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005754 }
5755 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005756 ctxt->input->version = version;
5757
5758 /*
5759 * We must have the encoding declaration
5760 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005761 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005762 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5763 /*
5764 * The XML REC instructs us to stop parsing right here
5765 */
5766 return;
5767 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005768 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5769 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5770 "Missing encoding in text declaration\n");
5771 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005772
5773 SKIP_BLANKS;
5774 if ((RAW == '?') && (NXT(1) == '>')) {
5775 SKIP(2);
5776 } else if (RAW == '>') {
5777 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005779 NEXT;
5780 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005781 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005782 MOVETO_ENDTAG(CUR_PTR);
5783 NEXT;
5784 }
5785}
5786
5787/**
Owen Taylor3473f882001-02-23 17:55:21 +00005788 * xmlParseExternalSubset:
5789 * @ctxt: an XML parser context
5790 * @ExternalID: the external identifier
5791 * @SystemID: the system identifier (or URL)
5792 *
5793 * parse Markup declarations from an external subset
5794 *
5795 * [30] extSubset ::= textDecl? extSubsetDecl
5796 *
5797 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5798 */
5799void
5800xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5801 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005802 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005803 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005804 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005805 xmlParseTextDecl(ctxt);
5806 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5807 /*
5808 * The XML REC instructs us to stop parsing right here
5809 */
5810 ctxt->instate = XML_PARSER_EOF;
5811 return;
5812 }
5813 }
5814 if (ctxt->myDoc == NULL) {
5815 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5816 }
5817 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5818 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5819
5820 ctxt->instate = XML_PARSER_DTD;
5821 ctxt->external = 1;
5822 while (((RAW == '<') && (NXT(1) == '?')) ||
5823 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005824 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005825 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005826 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005827
5828 GROW;
5829 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5830 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005831 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005832 NEXT;
5833 } else if (RAW == '%') {
5834 xmlParsePEReference(ctxt);
5835 } else
5836 xmlParseMarkupDecl(ctxt);
5837
5838 /*
5839 * Pop-up of finished entities.
5840 */
5841 while ((RAW == 0) && (ctxt->inputNr > 1))
5842 xmlPopInput(ctxt);
5843
Daniel Veillardfdc91562002-07-01 21:52:03 +00005844 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005845 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 break;
5847 }
5848 }
5849
5850 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005851 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005852 }
5853
5854}
5855
5856/**
5857 * xmlParseReference:
5858 * @ctxt: an XML parser context
5859 *
5860 * parse and handle entity references in content, depending on the SAX
5861 * interface, this may end-up in a call to character() if this is a
5862 * CharRef, a predefined entity, if there is no reference() callback.
5863 * or if the parser was asked to switch to that mode.
5864 *
5865 * [67] Reference ::= EntityRef | CharRef
5866 */
5867void
5868xmlParseReference(xmlParserCtxtPtr ctxt) {
5869 xmlEntityPtr ent;
5870 xmlChar *val;
5871 if (RAW != '&') return;
5872
5873 if (NXT(1) == '#') {
5874 int i = 0;
5875 xmlChar out[10];
5876 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005877 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005878
5879 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5880 /*
5881 * So we are using non-UTF-8 buffers
5882 * Check that the char fit on 8bits, if not
5883 * generate a CharRef.
5884 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005885 if (value <= 0xFF) {
5886 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005887 out[1] = 0;
5888 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5889 (!ctxt->disableSAX))
5890 ctxt->sax->characters(ctxt->userData, out, 1);
5891 } else {
5892 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005893 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005894 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005895 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5897 (!ctxt->disableSAX))
5898 ctxt->sax->reference(ctxt->userData, out);
5899 }
5900 } else {
5901 /*
5902 * Just encode the value in UTF-8
5903 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005904 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005905 out[i] = 0;
5906 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5907 (!ctxt->disableSAX))
5908 ctxt->sax->characters(ctxt->userData, out, i);
5909 }
5910 } else {
5911 ent = xmlParseEntityRef(ctxt);
5912 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005913 if (!ctxt->wellFormed)
5914 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005915 if ((ent->name != NULL) &&
5916 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5917 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005918 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005919
5920
5921 /*
5922 * The first reference to the entity trigger a parsing phase
5923 * where the ent->children is filled with the result from
5924 * the parsing.
5925 */
5926 if (ent->children == NULL) {
5927 xmlChar *value;
5928 value = ent->content;
5929
5930 /*
5931 * Check that this entity is well formed
5932 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005933 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005934 (value[1] == 0) && (value[0] == '<') &&
5935 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5936 /*
5937 * DONE: get definite answer on this !!!
5938 * Lots of entity decls are used to declare a single
5939 * char
5940 * <!ENTITY lt "<">
5941 * Which seems to be valid since
5942 * 2.4: The ampersand character (&) and the left angle
5943 * bracket (<) may appear in their literal form only
5944 * when used ... They are also legal within the literal
5945 * entity value of an internal entity declaration;i
5946 * see "4.3.2 Well-Formed Parsed Entities".
5947 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5948 * Looking at the OASIS test suite and James Clark
5949 * tests, this is broken. However the XML REC uses
5950 * it. Is the XML REC not well-formed ????
5951 * This is a hack to avoid this problem
5952 *
5953 * ANSWER: since lt gt amp .. are already defined,
5954 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005955 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005956 * is lousy but acceptable.
5957 */
5958 list = xmlNewDocText(ctxt->myDoc, value);
5959 if (list != NULL) {
5960 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5961 (ent->children == NULL)) {
5962 ent->children = list;
5963 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005964 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005965 list->parent = (xmlNodePtr) ent;
5966 } else {
5967 xmlFreeNodeList(list);
5968 }
5969 } else if (list != NULL) {
5970 xmlFreeNodeList(list);
5971 }
5972 } else {
5973 /*
5974 * 4.3.2: An internal general parsed entity is well-formed
5975 * if its replacement text matches the production labeled
5976 * content.
5977 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005978
5979 void *user_data;
5980 /*
5981 * This is a bit hackish but this seems the best
5982 * way to make sure both SAX and DOM entity support
5983 * behaves okay.
5984 */
5985 if (ctxt->userData == ctxt)
5986 user_data = NULL;
5987 else
5988 user_data = ctxt->userData;
5989
Owen Taylor3473f882001-02-23 17:55:21 +00005990 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5991 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005992 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5993 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005994 ctxt->depth--;
5995 } else if (ent->etype ==
5996 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5997 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005998 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005999 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006000 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006001 ctxt->depth--;
6002 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006003 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006004 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6005 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006006 }
6007 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006008 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006009 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006010 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006011 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6012 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006013 (ent->children == NULL)) {
6014 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006015 if (ctxt->replaceEntities) {
6016 /*
6017 * Prune it directly in the generated document
6018 * except for single text nodes.
6019 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006020 if (((list->type == XML_TEXT_NODE) &&
6021 (list->next == NULL)) ||
6022 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006023 list->parent = (xmlNodePtr) ent;
6024 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006025 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006026 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006027 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006028 while (list != NULL) {
6029 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006030 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006031 if (list->next == NULL)
6032 ent->last = list;
6033 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006034 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006035 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006036#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006037 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6038 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006039#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006040 }
6041 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006042 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006043 while (list != NULL) {
6044 list->parent = (xmlNodePtr) ent;
6045 if (list->next == NULL)
6046 ent->last = list;
6047 list = list->next;
6048 }
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
6050 } else {
6051 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006052 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006053 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006054 } else if ((ret != XML_ERR_OK) &&
6055 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006056 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006057 } else if (list != NULL) {
6058 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006059 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006060 }
6061 }
6062 }
6063 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6064 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6065 /*
6066 * Create a node.
6067 */
6068 ctxt->sax->reference(ctxt->userData, ent->name);
6069 return;
6070 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00006071 /*
6072 * There is a problem on the handling of _private for entities
6073 * (bug 155816): Should we copy the content of the field from
6074 * the entity (possibly overwriting some value set by the user
6075 * when a copy is created), should we leave it alone, or should
6076 * we try to take care of different situations? The problem
6077 * is exacerbated by the usage of this field by the xmlReader.
6078 * To fix this bug, we look at _private on the created node
6079 * and, if it's NULL, we copy in whatever was in the entity.
6080 * If it's not NULL we leave it alone. This is somewhat of a
6081 * hack - maybe we should have further tests to determine
6082 * what to do.
6083 */
Owen Taylor3473f882001-02-23 17:55:21 +00006084 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6085 /*
6086 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006087 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006088 * In the first occurrence list contains the replacement.
6089 * progressive == 2 means we are operating on the Reader
6090 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006091 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006092 if (((list == NULL) && (ent->owner == 0)) ||
6093 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006094 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006095
6096 /*
6097 * when operating on a reader, the entities definitions
6098 * are always owning the entities subtree.
6099 if (ctxt->parseMode == XML_PARSE_READER)
6100 ent->owner = 1;
6101 */
6102
Daniel Veillard62f313b2001-07-04 19:49:14 +00006103 cur = ent->children;
6104 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006105 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006106 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006107 if (nw->_private == NULL)
6108 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006109 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006110 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006111 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006112 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006113 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006114 if (cur == ent->last) {
6115 /*
6116 * needed to detect some strange empty
6117 * node cases in the reader tests
6118 */
6119 if ((ctxt->parseMode == XML_PARSE_READER) &&
6120 (nw->type == XML_ELEMENT_NODE) &&
6121 (nw->children == NULL))
6122 nw->extra = 1;
6123
Daniel Veillard62f313b2001-07-04 19:49:14 +00006124 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006125 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006126 cur = cur->next;
6127 }
Daniel Veillard81273902003-09-30 00:43:48 +00006128#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006129 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006130 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006131#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006132 } else if (list == NULL) {
6133 xmlNodePtr nw = NULL, cur, next, last,
6134 firstChild = NULL;
6135 /*
6136 * Copy the entity child list and make it the new
6137 * entity child list. The goal is to make sure any
6138 * ID or REF referenced will be the one from the
6139 * document content and not the entity copy.
6140 */
6141 cur = ent->children;
6142 ent->children = NULL;
6143 last = ent->last;
6144 ent->last = NULL;
6145 while (cur != NULL) {
6146 next = cur->next;
6147 cur->next = NULL;
6148 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006149 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006150 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006151 if (nw->_private == NULL)
6152 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006153 if (firstChild == NULL){
6154 firstChild = cur;
6155 }
6156 xmlAddChild((xmlNodePtr) ent, nw);
6157 xmlAddChild(ctxt->node, cur);
6158 }
6159 if (cur == last)
6160 break;
6161 cur = next;
6162 }
6163 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006164#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006165 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6166 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006167#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006168 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006169 const xmlChar *nbktext;
6170
Daniel Veillard62f313b2001-07-04 19:49:14 +00006171 /*
6172 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006173 * node with a possible previous text one which
6174 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006175 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006176 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6177 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006178 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006179 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006180 if ((ent->last != ent->children) &&
6181 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006182 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006183 xmlAddChildList(ctxt->node, ent->children);
6184 }
6185
Owen Taylor3473f882001-02-23 17:55:21 +00006186 /*
6187 * This is to avoid a nasty side effect, see
6188 * characters() in SAX.c
6189 */
6190 ctxt->nodemem = 0;
6191 ctxt->nodelen = 0;
6192 return;
6193 } else {
6194 /*
6195 * Probably running in SAX mode
6196 */
6197 xmlParserInputPtr input;
6198
6199 input = xmlNewEntityInputStream(ctxt, ent);
6200 xmlPushInput(ctxt, input);
6201 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006202 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
6203 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006204 xmlParseTextDecl(ctxt);
6205 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6206 /*
6207 * The XML REC instructs us to stop parsing right here
6208 */
6209 ctxt->instate = XML_PARSER_EOF;
6210 return;
6211 }
6212 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006213 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
6214 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006215 }
6216 }
6217 return;
6218 }
6219 }
6220 } else {
6221 val = ent->content;
6222 if (val == NULL) return;
6223 /*
6224 * inline the entity.
6225 */
6226 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6227 (!ctxt->disableSAX))
6228 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6229 }
6230 }
6231}
6232
6233/**
6234 * xmlParseEntityRef:
6235 * @ctxt: an XML parser context
6236 *
6237 * parse ENTITY references declarations
6238 *
6239 * [68] EntityRef ::= '&' Name ';'
6240 *
6241 * [ WFC: Entity Declared ]
6242 * In a document without any DTD, a document with only an internal DTD
6243 * subset which contains no parameter entity references, or a document
6244 * with "standalone='yes'", the Name given in the entity reference
6245 * must match that in an entity declaration, except that well-formed
6246 * documents need not declare any of the following entities: amp, lt,
6247 * gt, apos, quot. The declaration of a parameter entity must precede
6248 * any reference to it. Similarly, the declaration of a general entity
6249 * must precede any reference to it which appears in a default value in an
6250 * attribute-list declaration. Note that if entities are declared in the
6251 * external subset or in external parameter entities, a non-validating
6252 * processor is not obligated to read and process their declarations;
6253 * for such documents, the rule that an entity must be declared is a
6254 * well-formedness constraint only if standalone='yes'.
6255 *
6256 * [ WFC: Parsed Entity ]
6257 * An entity reference must not contain the name of an unparsed entity
6258 *
6259 * Returns the xmlEntityPtr if found, or NULL otherwise.
6260 */
6261xmlEntityPtr
6262xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006263 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006264 xmlEntityPtr ent = NULL;
6265
6266 GROW;
6267
6268 if (RAW == '&') {
6269 NEXT;
6270 name = xmlParseName(ctxt);
6271 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006272 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6273 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006274 } else {
6275 if (RAW == ';') {
6276 NEXT;
6277 /*
6278 * Ask first SAX for entity resolution, otherwise try the
6279 * predefined set.
6280 */
6281 if (ctxt->sax != NULL) {
6282 if (ctxt->sax->getEntity != NULL)
6283 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006284 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006285 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006286 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6287 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006288 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006289 }
Owen Taylor3473f882001-02-23 17:55:21 +00006290 }
6291 /*
6292 * [ WFC: Entity Declared ]
6293 * In a document without any DTD, a document with only an
6294 * internal DTD subset which contains no parameter entity
6295 * references, or a document with "standalone='yes'", the
6296 * Name given in the entity reference must match that in an
6297 * entity declaration, except that well-formed documents
6298 * need not declare any of the following entities: amp, lt,
6299 * gt, apos, quot.
6300 * The declaration of a parameter entity must precede any
6301 * reference to it.
6302 * Similarly, the declaration of a general entity must
6303 * precede any reference to it which appears in a default
6304 * value in an attribute-list declaration. Note that if
6305 * entities are declared in the external subset or in
6306 * external parameter entities, a non-validating processor
6307 * is not obligated to read and process their declarations;
6308 * for such documents, the rule that an entity must be
6309 * declared is a well-formedness constraint only if
6310 * standalone='yes'.
6311 */
6312 if (ent == NULL) {
6313 if ((ctxt->standalone == 1) ||
6314 ((ctxt->hasExternalSubset == 0) &&
6315 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006316 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006317 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006318 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006319 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006320 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00006321 if ((ctxt->inSubset == 0) &&
6322 (ctxt->sax != NULL) &&
6323 (ctxt->sax->reference != NULL)) {
6324 ctxt->sax->reference(ctxt, name);
6325 }
Owen Taylor3473f882001-02-23 17:55:21 +00006326 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006327 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006328 }
6329
6330 /*
6331 * [ WFC: Parsed Entity ]
6332 * An entity reference must not contain the name of an
6333 * unparsed entity
6334 */
6335 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006337 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
6339
6340 /*
6341 * [ WFC: No External Entity References ]
6342 * Attribute values cannot contain direct or indirect
6343 * entity references to external entities.
6344 */
6345 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6346 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006347 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6348 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 }
6350 /*
6351 * [ WFC: No < in Attribute Values ]
6352 * The replacement text of any entity referred to directly or
6353 * indirectly in an attribute value (other than "&lt;") must
6354 * not contain a <.
6355 */
6356 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6357 (ent != NULL) &&
6358 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6359 (ent->content != NULL) &&
6360 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006361 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006362 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006363 }
6364
6365 /*
6366 * Internal check, no parameter entities here ...
6367 */
6368 else {
6369 switch (ent->etype) {
6370 case XML_INTERNAL_PARAMETER_ENTITY:
6371 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006372 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6373 "Attempt to reference the parameter entity '%s'\n",
6374 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 break;
6376 default:
6377 break;
6378 }
6379 }
6380
6381 /*
6382 * [ WFC: No Recursion ]
6383 * A parsed entity must not contain a recursive reference
6384 * to itself, either directly or indirectly.
6385 * Done somewhere else
6386 */
6387
6388 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006389 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006390 }
Owen Taylor3473f882001-02-23 17:55:21 +00006391 }
6392 }
6393 return(ent);
6394}
6395
6396/**
6397 * xmlParseStringEntityRef:
6398 * @ctxt: an XML parser context
6399 * @str: a pointer to an index in the string
6400 *
6401 * parse ENTITY references declarations, but this version parses it from
6402 * a string value.
6403 *
6404 * [68] EntityRef ::= '&' Name ';'
6405 *
6406 * [ WFC: Entity Declared ]
6407 * In a document without any DTD, a document with only an internal DTD
6408 * subset which contains no parameter entity references, or a document
6409 * with "standalone='yes'", the Name given in the entity reference
6410 * must match that in an entity declaration, except that well-formed
6411 * documents need not declare any of the following entities: amp, lt,
6412 * gt, apos, quot. The declaration of a parameter entity must precede
6413 * any reference to it. Similarly, the declaration of a general entity
6414 * must precede any reference to it which appears in a default value in an
6415 * attribute-list declaration. Note that if entities are declared in the
6416 * external subset or in external parameter entities, a non-validating
6417 * processor is not obligated to read and process their declarations;
6418 * for such documents, the rule that an entity must be declared is a
6419 * well-formedness constraint only if standalone='yes'.
6420 *
6421 * [ WFC: Parsed Entity ]
6422 * An entity reference must not contain the name of an unparsed entity
6423 *
6424 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6425 * is updated to the current location in the string.
6426 */
6427xmlEntityPtr
6428xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6429 xmlChar *name;
6430 const xmlChar *ptr;
6431 xmlChar cur;
6432 xmlEntityPtr ent = NULL;
6433
6434 if ((str == NULL) || (*str == NULL))
6435 return(NULL);
6436 ptr = *str;
6437 cur = *ptr;
6438 if (cur == '&') {
6439 ptr++;
6440 cur = *ptr;
6441 name = xmlParseStringName(ctxt, &ptr);
6442 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006443 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6444 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006445 } else {
6446 if (*ptr == ';') {
6447 ptr++;
6448 /*
6449 * Ask first SAX for entity resolution, otherwise try the
6450 * predefined set.
6451 */
6452 if (ctxt->sax != NULL) {
6453 if (ctxt->sax->getEntity != NULL)
6454 ent = ctxt->sax->getEntity(ctxt->userData, name);
6455 if (ent == NULL)
6456 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006457 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006458 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006459 }
Owen Taylor3473f882001-02-23 17:55:21 +00006460 }
6461 /*
6462 * [ WFC: Entity Declared ]
6463 * In a document without any DTD, a document with only an
6464 * internal DTD subset which contains no parameter entity
6465 * references, or a document with "standalone='yes'", the
6466 * Name given in the entity reference must match that in an
6467 * entity declaration, except that well-formed documents
6468 * need not declare any of the following entities: amp, lt,
6469 * gt, apos, quot.
6470 * The declaration of a parameter entity must precede any
6471 * reference to it.
6472 * Similarly, the declaration of a general entity must
6473 * precede any reference to it which appears in a default
6474 * value in an attribute-list declaration. Note that if
6475 * entities are declared in the external subset or in
6476 * external parameter entities, a non-validating processor
6477 * is not obligated to read and process their declarations;
6478 * for such documents, the rule that an entity must be
6479 * declared is a well-formedness constraint only if
6480 * standalone='yes'.
6481 */
6482 if (ent == NULL) {
6483 if ((ctxt->standalone == 1) ||
6484 ((ctxt->hasExternalSubset == 0) &&
6485 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006486 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006487 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006488 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006489 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006490 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006491 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006493 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006494 }
6495
6496 /*
6497 * [ WFC: Parsed Entity ]
6498 * An entity reference must not contain the name of an
6499 * unparsed entity
6500 */
6501 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006502 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006503 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006504 }
6505
6506 /*
6507 * [ WFC: No External Entity References ]
6508 * Attribute values cannot contain direct or indirect
6509 * entity references to external entities.
6510 */
6511 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6512 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006513 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006514 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006515 }
6516 /*
6517 * [ WFC: No < in Attribute Values ]
6518 * The replacement text of any entity referred to directly or
6519 * indirectly in an attribute value (other than "&lt;") must
6520 * not contain a <.
6521 */
6522 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6523 (ent != NULL) &&
6524 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6525 (ent->content != NULL) &&
6526 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006527 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6528 "'<' in entity '%s' is not allowed in attributes values\n",
6529 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006530 }
6531
6532 /*
6533 * Internal check, no parameter entities here ...
6534 */
6535 else {
6536 switch (ent->etype) {
6537 case XML_INTERNAL_PARAMETER_ENTITY:
6538 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006539 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6540 "Attempt to reference the parameter entity '%s'\n",
6541 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006542 break;
6543 default:
6544 break;
6545 }
6546 }
6547
6548 /*
6549 * [ WFC: No Recursion ]
6550 * A parsed entity must not contain a recursive reference
6551 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006552 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006553 */
6554
6555 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006556 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006557 }
6558 xmlFree(name);
6559 }
6560 }
6561 *str = ptr;
6562 return(ent);
6563}
6564
6565/**
6566 * xmlParsePEReference:
6567 * @ctxt: an XML parser context
6568 *
6569 * parse PEReference declarations
6570 * The entity content is handled directly by pushing it's content as
6571 * a new input stream.
6572 *
6573 * [69] PEReference ::= '%' Name ';'
6574 *
6575 * [ WFC: No Recursion ]
6576 * A parsed entity must not contain a recursive
6577 * reference to itself, either directly or indirectly.
6578 *
6579 * [ WFC: Entity Declared ]
6580 * In a document without any DTD, a document with only an internal DTD
6581 * subset which contains no parameter entity references, or a document
6582 * with "standalone='yes'", ... ... The declaration of a parameter
6583 * entity must precede any reference to it...
6584 *
6585 * [ VC: Entity Declared ]
6586 * In a document with an external subset or external parameter entities
6587 * with "standalone='no'", ... ... The declaration of a parameter entity
6588 * must precede any reference to it...
6589 *
6590 * [ WFC: In DTD ]
6591 * Parameter-entity references may only appear in the DTD.
6592 * NOTE: misleading but this is handled.
6593 */
6594void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006595xmlParsePEReference(xmlParserCtxtPtr ctxt)
6596{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006597 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006598 xmlEntityPtr entity = NULL;
6599 xmlParserInputPtr input;
6600
6601 if (RAW == '%') {
6602 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006603 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006604 if (name == NULL) {
6605 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6606 "xmlParsePEReference: no name\n");
6607 } else {
6608 if (RAW == ';') {
6609 NEXT;
6610 if ((ctxt->sax != NULL) &&
6611 (ctxt->sax->getParameterEntity != NULL))
6612 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6613 name);
6614 if (entity == NULL) {
6615 /*
6616 * [ WFC: Entity Declared ]
6617 * In a document without any DTD, a document with only an
6618 * internal DTD subset which contains no parameter entity
6619 * references, or a document with "standalone='yes'", ...
6620 * ... The declaration of a parameter entity must precede
6621 * any reference to it...
6622 */
6623 if ((ctxt->standalone == 1) ||
6624 ((ctxt->hasExternalSubset == 0) &&
6625 (ctxt->hasPErefs == 0))) {
6626 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6627 "PEReference: %%%s; not found\n",
6628 name);
6629 } else {
6630 /*
6631 * [ VC: Entity Declared ]
6632 * In a document with an external subset or external
6633 * parameter entities with "standalone='no'", ...
6634 * ... The declaration of a parameter entity must
6635 * precede any reference to it...
6636 */
6637 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6638 "PEReference: %%%s; not found\n",
6639 name, NULL);
6640 ctxt->valid = 0;
6641 }
6642 } else {
6643 /*
6644 * Internal checking in case the entity quest barfed
6645 */
6646 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6647 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6648 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6649 "Internal: %%%s; is not a parameter entity\n",
6650 name, NULL);
6651 } else if (ctxt->input->free != deallocblankswrapper) {
6652 input =
6653 xmlNewBlanksWrapperInputStream(ctxt, entity);
6654 xmlPushInput(ctxt, input);
6655 } else {
6656 /*
6657 * TODO !!!
6658 * handle the extra spaces added before and after
6659 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6660 */
6661 input = xmlNewEntityInputStream(ctxt, entity);
6662 xmlPushInput(ctxt, input);
6663 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006664 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006665 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006666 xmlParseTextDecl(ctxt);
6667 if (ctxt->errNo ==
6668 XML_ERR_UNSUPPORTED_ENCODING) {
6669 /*
6670 * The XML REC instructs us to stop parsing
6671 * right here
6672 */
6673 ctxt->instate = XML_PARSER_EOF;
6674 return;
6675 }
6676 }
6677 }
6678 }
6679 ctxt->hasPErefs = 1;
6680 } else {
6681 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6682 }
6683 }
Owen Taylor3473f882001-02-23 17:55:21 +00006684 }
6685}
6686
6687/**
6688 * xmlParseStringPEReference:
6689 * @ctxt: an XML parser context
6690 * @str: a pointer to an index in the string
6691 *
6692 * parse PEReference declarations
6693 *
6694 * [69] PEReference ::= '%' Name ';'
6695 *
6696 * [ WFC: No Recursion ]
6697 * A parsed entity must not contain a recursive
6698 * reference to itself, either directly or indirectly.
6699 *
6700 * [ WFC: Entity Declared ]
6701 * In a document without any DTD, a document with only an internal DTD
6702 * subset which contains no parameter entity references, or a document
6703 * with "standalone='yes'", ... ... The declaration of a parameter
6704 * entity must precede any reference to it...
6705 *
6706 * [ VC: Entity Declared ]
6707 * In a document with an external subset or external parameter entities
6708 * with "standalone='no'", ... ... The declaration of a parameter entity
6709 * must precede any reference to it...
6710 *
6711 * [ WFC: In DTD ]
6712 * Parameter-entity references may only appear in the DTD.
6713 * NOTE: misleading but this is handled.
6714 *
6715 * Returns the string of the entity content.
6716 * str is updated to the current value of the index
6717 */
6718xmlEntityPtr
6719xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6720 const xmlChar *ptr;
6721 xmlChar cur;
6722 xmlChar *name;
6723 xmlEntityPtr entity = NULL;
6724
6725 if ((str == NULL) || (*str == NULL)) return(NULL);
6726 ptr = *str;
6727 cur = *ptr;
6728 if (cur == '%') {
6729 ptr++;
6730 cur = *ptr;
6731 name = xmlParseStringName(ctxt, &ptr);
6732 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006733 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6734 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006735 } else {
6736 cur = *ptr;
6737 if (cur == ';') {
6738 ptr++;
6739 cur = *ptr;
6740 if ((ctxt->sax != NULL) &&
6741 (ctxt->sax->getParameterEntity != NULL))
6742 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6743 name);
6744 if (entity == NULL) {
6745 /*
6746 * [ WFC: Entity Declared ]
6747 * In a document without any DTD, a document with only an
6748 * internal DTD subset which contains no parameter entity
6749 * references, or a document with "standalone='yes'", ...
6750 * ... The declaration of a parameter entity must precede
6751 * any reference to it...
6752 */
6753 if ((ctxt->standalone == 1) ||
6754 ((ctxt->hasExternalSubset == 0) &&
6755 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006756 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006757 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006758 } else {
6759 /*
6760 * [ VC: Entity Declared ]
6761 * In a document with an external subset or external
6762 * parameter entities with "standalone='no'", ...
6763 * ... The declaration of a parameter entity must
6764 * precede any reference to it...
6765 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006766 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6767 "PEReference: %%%s; not found\n",
6768 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006769 ctxt->valid = 0;
6770 }
6771 } else {
6772 /*
6773 * Internal checking in case the entity quest barfed
6774 */
6775 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6776 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006777 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6778 "%%%s; is not a parameter entity\n",
6779 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 }
6781 }
6782 ctxt->hasPErefs = 1;
6783 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006784 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006785 }
6786 xmlFree(name);
6787 }
6788 }
6789 *str = ptr;
6790 return(entity);
6791}
6792
6793/**
6794 * xmlParseDocTypeDecl:
6795 * @ctxt: an XML parser context
6796 *
6797 * parse a DOCTYPE declaration
6798 *
6799 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6800 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6801 *
6802 * [ VC: Root Element Type ]
6803 * The Name in the document type declaration must match the element
6804 * type of the root element.
6805 */
6806
6807void
6808xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006809 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006810 xmlChar *ExternalID = NULL;
6811 xmlChar *URI = NULL;
6812
6813 /*
6814 * We know that '<!DOCTYPE' has been detected.
6815 */
6816 SKIP(9);
6817
6818 SKIP_BLANKS;
6819
6820 /*
6821 * Parse the DOCTYPE name.
6822 */
6823 name = xmlParseName(ctxt);
6824 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006825 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6826 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006827 }
6828 ctxt->intSubName = name;
6829
6830 SKIP_BLANKS;
6831
6832 /*
6833 * Check for SystemID and ExternalID
6834 */
6835 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6836
6837 if ((URI != NULL) || (ExternalID != NULL)) {
6838 ctxt->hasExternalSubset = 1;
6839 }
6840 ctxt->extSubURI = URI;
6841 ctxt->extSubSystem = ExternalID;
6842
6843 SKIP_BLANKS;
6844
6845 /*
6846 * Create and update the internal subset.
6847 */
6848 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6849 (!ctxt->disableSAX))
6850 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6851
6852 /*
6853 * Is there any internal subset declarations ?
6854 * they are handled separately in xmlParseInternalSubset()
6855 */
6856 if (RAW == '[')
6857 return;
6858
6859 /*
6860 * We should be at the end of the DOCTYPE declaration.
6861 */
6862 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006863 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006864 }
6865 NEXT;
6866}
6867
6868/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006869 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006870 * @ctxt: an XML parser context
6871 *
6872 * parse the internal subset declaration
6873 *
6874 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6875 */
6876
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006877static void
Owen Taylor3473f882001-02-23 17:55:21 +00006878xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6879 /*
6880 * Is there any DTD definition ?
6881 */
6882 if (RAW == '[') {
6883 ctxt->instate = XML_PARSER_DTD;
6884 NEXT;
6885 /*
6886 * Parse the succession of Markup declarations and
6887 * PEReferences.
6888 * Subsequence (markupdecl | PEReference | S)*
6889 */
6890 while (RAW != ']') {
6891 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006892 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006893
6894 SKIP_BLANKS;
6895 xmlParseMarkupDecl(ctxt);
6896 xmlParsePEReference(ctxt);
6897
6898 /*
6899 * Pop-up of finished entities.
6900 */
6901 while ((RAW == 0) && (ctxt->inputNr > 1))
6902 xmlPopInput(ctxt);
6903
6904 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006905 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006906 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006907 break;
6908 }
6909 }
6910 if (RAW == ']') {
6911 NEXT;
6912 SKIP_BLANKS;
6913 }
6914 }
6915
6916 /*
6917 * We should be at the end of the DOCTYPE declaration.
6918 */
6919 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006920 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006921 }
6922 NEXT;
6923}
6924
Daniel Veillard81273902003-09-30 00:43:48 +00006925#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006926/**
6927 * xmlParseAttribute:
6928 * @ctxt: an XML parser context
6929 * @value: a xmlChar ** used to store the value of the attribute
6930 *
6931 * parse an attribute
6932 *
6933 * [41] Attribute ::= Name Eq AttValue
6934 *
6935 * [ WFC: No External Entity References ]
6936 * Attribute values cannot contain direct or indirect entity references
6937 * to external entities.
6938 *
6939 * [ WFC: No < in Attribute Values ]
6940 * The replacement text of any entity referred to directly or indirectly in
6941 * an attribute value (other than "&lt;") must not contain a <.
6942 *
6943 * [ VC: Attribute Value Type ]
6944 * The attribute must have been declared; the value must be of the type
6945 * declared for it.
6946 *
6947 * [25] Eq ::= S? '=' S?
6948 *
6949 * With namespace:
6950 *
6951 * [NS 11] Attribute ::= QName Eq AttValue
6952 *
6953 * Also the case QName == xmlns:??? is handled independently as a namespace
6954 * definition.
6955 *
6956 * Returns the attribute name, and the value in *value.
6957 */
6958
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006959const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006960xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006961 const xmlChar *name;
6962 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006963
6964 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006965 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006966 name = xmlParseName(ctxt);
6967 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006968 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006969 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006970 return(NULL);
6971 }
6972
6973 /*
6974 * read the value
6975 */
6976 SKIP_BLANKS;
6977 if (RAW == '=') {
6978 NEXT;
6979 SKIP_BLANKS;
6980 val = xmlParseAttValue(ctxt);
6981 ctxt->instate = XML_PARSER_CONTENT;
6982 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006983 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006984 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006985 return(NULL);
6986 }
6987
6988 /*
6989 * Check that xml:lang conforms to the specification
6990 * No more registered as an error, just generate a warning now
6991 * since this was deprecated in XML second edition
6992 */
6993 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6994 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006995 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6996 "Malformed value for xml:lang : %s\n",
6997 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006998 }
6999 }
7000
7001 /*
7002 * Check that xml:space conforms to the specification
7003 */
7004 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7005 if (xmlStrEqual(val, BAD_CAST "default"))
7006 *(ctxt->space) = 0;
7007 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7008 *(ctxt->space) = 1;
7009 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007010 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007011"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007012 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007013 }
7014 }
7015
7016 *value = val;
7017 return(name);
7018}
7019
7020/**
7021 * xmlParseStartTag:
7022 * @ctxt: an XML parser context
7023 *
7024 * parse a start of tag either for rule element or
7025 * EmptyElement. In both case we don't parse the tag closing chars.
7026 *
7027 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7028 *
7029 * [ WFC: Unique Att Spec ]
7030 * No attribute name may appear more than once in the same start-tag or
7031 * empty-element tag.
7032 *
7033 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7034 *
7035 * [ WFC: Unique Att Spec ]
7036 * No attribute name may appear more than once in the same start-tag or
7037 * empty-element tag.
7038 *
7039 * With namespace:
7040 *
7041 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7042 *
7043 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7044 *
7045 * Returns the element name parsed
7046 */
7047
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007048const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007049xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007050 const xmlChar *name;
7051 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007052 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007053 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007054 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007055 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007056 int i;
7057
7058 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007059 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007060
7061 name = xmlParseName(ctxt);
7062 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007063 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007064 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007065 return(NULL);
7066 }
7067
7068 /*
7069 * Now parse the attributes, it ends up with the ending
7070 *
7071 * (S Attribute)* S?
7072 */
7073 SKIP_BLANKS;
7074 GROW;
7075
Daniel Veillard21a0f912001-02-25 19:54:14 +00007076 while ((RAW != '>') &&
7077 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007078 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007079 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007080 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007081
7082 attname = xmlParseAttribute(ctxt, &attvalue);
7083 if ((attname != NULL) && (attvalue != NULL)) {
7084 /*
7085 * [ WFC: Unique Att Spec ]
7086 * No attribute name may appear more than once in the same
7087 * start-tag or empty-element tag.
7088 */
7089 for (i = 0; i < nbatts;i += 2) {
7090 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007091 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007092 xmlFree(attvalue);
7093 goto failed;
7094 }
7095 }
Owen Taylor3473f882001-02-23 17:55:21 +00007096 /*
7097 * Add the pair to atts
7098 */
7099 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007100 maxatts = 22; /* allow for 10 attrs by default */
7101 atts = (const xmlChar **)
7102 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007103 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007104 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007105 if (attvalue != NULL)
7106 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007107 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007108 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007109 ctxt->atts = atts;
7110 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007111 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007112 const xmlChar **n;
7113
Owen Taylor3473f882001-02-23 17:55:21 +00007114 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007115 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007116 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007117 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007118 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007119 if (attvalue != NULL)
7120 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007121 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007122 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007123 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007124 ctxt->atts = atts;
7125 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007126 }
7127 atts[nbatts++] = attname;
7128 atts[nbatts++] = attvalue;
7129 atts[nbatts] = NULL;
7130 atts[nbatts + 1] = NULL;
7131 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007132 if (attvalue != NULL)
7133 xmlFree(attvalue);
7134 }
7135
7136failed:
7137
Daniel Veillard3772de32002-12-17 10:31:45 +00007138 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007139 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7140 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007141 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007142 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7143 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007144 }
7145 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007146 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7147 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007148 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7149 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007150 break;
7151 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007152 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007153 GROW;
7154 }
7155
7156 /*
7157 * SAX: Start of Element !
7158 */
7159 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007160 (!ctxt->disableSAX)) {
7161 if (nbatts > 0)
7162 ctxt->sax->startElement(ctxt->userData, name, atts);
7163 else
7164 ctxt->sax->startElement(ctxt->userData, name, NULL);
7165 }
Owen Taylor3473f882001-02-23 17:55:21 +00007166
7167 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007168 /* Free only the content strings */
7169 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007170 if (atts[i] != NULL)
7171 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007172 }
7173 return(name);
7174}
7175
7176/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007177 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007178 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007179 * @line: line of the start tag
7180 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007181 *
7182 * parse an end of tag
7183 *
7184 * [42] ETag ::= '</' Name S? '>'
7185 *
7186 * With namespace
7187 *
7188 * [NS 9] ETag ::= '</' QName S? '>'
7189 */
7190
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007191static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007192xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007193 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007194
7195 GROW;
7196 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007197 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007198 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007199 return;
7200 }
7201 SKIP(2);
7202
Daniel Veillard46de64e2002-05-29 08:21:33 +00007203 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007204
7205 /*
7206 * We should definitely be at the ending "S? '>'" part
7207 */
7208 GROW;
7209 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007210 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007211 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007212 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00007213 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007214
7215 /*
7216 * [ WFC: Element Type Match ]
7217 * The Name in an element's end-tag must match the element type in the
7218 * start-tag.
7219 *
7220 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00007221 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007222 if (name == NULL) name = BAD_CAST "unparseable";
7223 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007224 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007225 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007226 }
7227
7228 /*
7229 * SAX: End of Tag
7230 */
7231 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
7232 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00007233 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007234
Daniel Veillarde57ec792003-09-10 10:50:59 +00007235 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007236 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007237 return;
7238}
7239
7240/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007241 * xmlParseEndTag:
7242 * @ctxt: an XML parser context
7243 *
7244 * parse an end of tag
7245 *
7246 * [42] ETag ::= '</' Name S? '>'
7247 *
7248 * With namespace
7249 *
7250 * [NS 9] ETag ::= '</' QName S? '>'
7251 */
7252
7253void
7254xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007255 xmlParseEndTag1(ctxt, 0);
7256}
Daniel Veillard81273902003-09-30 00:43:48 +00007257#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007258
7259/************************************************************************
7260 * *
7261 * SAX 2 specific operations *
7262 * *
7263 ************************************************************************/
7264
7265static const xmlChar *
7266xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7267 int len = 0, l;
7268 int c;
7269 int count = 0;
7270
7271 /*
7272 * Handler for more complex cases
7273 */
7274 GROW;
7275 c = CUR_CHAR(l);
7276 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007277 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007278 return(NULL);
7279 }
7280
7281 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007282 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007283 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007284 (IS_COMBINING(c)) ||
7285 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007286 if (count++ > 100) {
7287 count = 0;
7288 GROW;
7289 }
7290 len += l;
7291 NEXTL(l);
7292 c = CUR_CHAR(l);
7293 }
7294 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7295}
7296
7297/*
7298 * xmlGetNamespace:
7299 * @ctxt: an XML parser context
7300 * @prefix: the prefix to lookup
7301 *
7302 * Lookup the namespace name for the @prefix (which ca be NULL)
7303 * The prefix must come from the @ctxt->dict dictionnary
7304 *
7305 * Returns the namespace name or NULL if not bound
7306 */
7307static const xmlChar *
7308xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7309 int i;
7310
Daniel Veillarde57ec792003-09-10 10:50:59 +00007311 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007312 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007313 if (ctxt->nsTab[i] == prefix) {
7314 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7315 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007316 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007317 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007318 return(NULL);
7319}
7320
7321/**
7322 * xmlParseNCName:
7323 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007324 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007325 *
7326 * parse an XML name.
7327 *
7328 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7329 * CombiningChar | Extender
7330 *
7331 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7332 *
7333 * Returns the Name parsed or NULL
7334 */
7335
7336static const xmlChar *
7337xmlParseNCName(xmlParserCtxtPtr ctxt) {
7338 const xmlChar *in;
7339 const xmlChar *ret;
7340 int count = 0;
7341
7342 /*
7343 * Accelerator for simple ASCII names
7344 */
7345 in = ctxt->input->cur;
7346 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7347 ((*in >= 0x41) && (*in <= 0x5A)) ||
7348 (*in == '_')) {
7349 in++;
7350 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7351 ((*in >= 0x41) && (*in <= 0x5A)) ||
7352 ((*in >= 0x30) && (*in <= 0x39)) ||
7353 (*in == '_') || (*in == '-') ||
7354 (*in == '.'))
7355 in++;
7356 if ((*in > 0) && (*in < 0x80)) {
7357 count = in - ctxt->input->cur;
7358 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7359 ctxt->input->cur = in;
7360 ctxt->nbChars += count;
7361 ctxt->input->col += count;
7362 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007363 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007364 }
7365 return(ret);
7366 }
7367 }
7368 return(xmlParseNCNameComplex(ctxt));
7369}
7370
7371/**
7372 * xmlParseQName:
7373 * @ctxt: an XML parser context
7374 * @prefix: pointer to store the prefix part
7375 *
7376 * parse an XML Namespace QName
7377 *
7378 * [6] QName ::= (Prefix ':')? LocalPart
7379 * [7] Prefix ::= NCName
7380 * [8] LocalPart ::= NCName
7381 *
7382 * Returns the Name parsed or NULL
7383 */
7384
7385static const xmlChar *
7386xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7387 const xmlChar *l, *p;
7388
7389 GROW;
7390
7391 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007392 if (l == NULL) {
7393 if (CUR == ':') {
7394 l = xmlParseName(ctxt);
7395 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007396 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7397 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007398 *prefix = NULL;
7399 return(l);
7400 }
7401 }
7402 return(NULL);
7403 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007404 if (CUR == ':') {
7405 NEXT;
7406 p = l;
7407 l = xmlParseNCName(ctxt);
7408 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007409 xmlChar *tmp;
7410
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007411 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7412 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007413 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7414 p = xmlDictLookup(ctxt->dict, tmp, -1);
7415 if (tmp != NULL) xmlFree(tmp);
7416 *prefix = NULL;
7417 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007418 }
7419 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007420 xmlChar *tmp;
7421
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007422 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7423 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007424 NEXT;
7425 tmp = (xmlChar *) xmlParseName(ctxt);
7426 if (tmp != NULL) {
7427 tmp = xmlBuildQName(tmp, l, NULL, 0);
7428 l = xmlDictLookup(ctxt->dict, tmp, -1);
7429 if (tmp != NULL) xmlFree(tmp);
7430 *prefix = p;
7431 return(l);
7432 }
7433 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7434 l = xmlDictLookup(ctxt->dict, tmp, -1);
7435 if (tmp != NULL) xmlFree(tmp);
7436 *prefix = p;
7437 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007438 }
7439 *prefix = p;
7440 } else
7441 *prefix = NULL;
7442 return(l);
7443}
7444
7445/**
7446 * xmlParseQNameAndCompare:
7447 * @ctxt: an XML parser context
7448 * @name: the localname
7449 * @prefix: the prefix, if any.
7450 *
7451 * parse an XML name and compares for match
7452 * (specialized for endtag parsing)
7453 *
7454 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7455 * and the name for mismatch
7456 */
7457
7458static const xmlChar *
7459xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7460 xmlChar const *prefix) {
7461 const xmlChar *cmp = name;
7462 const xmlChar *in;
7463 const xmlChar *ret;
7464 const xmlChar *prefix2;
7465
7466 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7467
7468 GROW;
7469 in = ctxt->input->cur;
7470
7471 cmp = prefix;
7472 while (*in != 0 && *in == *cmp) {
7473 ++in;
7474 ++cmp;
7475 }
7476 if ((*cmp == 0) && (*in == ':')) {
7477 in++;
7478 cmp = name;
7479 while (*in != 0 && *in == *cmp) {
7480 ++in;
7481 ++cmp;
7482 }
William M. Brack76e95df2003-10-18 16:20:14 +00007483 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484 /* success */
7485 ctxt->input->cur = in;
7486 return((const xmlChar*) 1);
7487 }
7488 }
7489 /*
7490 * all strings coms from the dictionary, equality can be done directly
7491 */
7492 ret = xmlParseQName (ctxt, &prefix2);
7493 if ((ret == name) && (prefix == prefix2))
7494 return((const xmlChar*) 1);
7495 return ret;
7496}
7497
7498/**
7499 * xmlParseAttValueInternal:
7500 * @ctxt: an XML parser context
7501 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007502 * @alloc: whether the attribute was reallocated as a new string
7503 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007504 *
7505 * parse a value for an attribute.
7506 * NOTE: if no normalization is needed, the routine will return pointers
7507 * directly from the data buffer.
7508 *
7509 * 3.3.3 Attribute-Value Normalization:
7510 * Before the value of an attribute is passed to the application or
7511 * checked for validity, the XML processor must normalize it as follows:
7512 * - a character reference is processed by appending the referenced
7513 * character to the attribute value
7514 * - an entity reference is processed by recursively processing the
7515 * replacement text of the entity
7516 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7517 * appending #x20 to the normalized value, except that only a single
7518 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7519 * parsed entity or the literal entity value of an internal parsed entity
7520 * - other characters are processed by appending them to the normalized value
7521 * If the declared value is not CDATA, then the XML processor must further
7522 * process the normalized attribute value by discarding any leading and
7523 * trailing space (#x20) characters, and by replacing sequences of space
7524 * (#x20) characters by a single space (#x20) character.
7525 * All attributes for which no declaration has been read should be treated
7526 * by a non-validating parser as if declared CDATA.
7527 *
7528 * Returns the AttValue parsed or NULL. The value has to be freed by the
7529 * caller if it was copied, this can be detected by val[*len] == 0.
7530 */
7531
7532static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007533xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7534 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007535{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007536 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007537 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007538 xmlChar *ret = NULL;
7539
7540 GROW;
7541 in = (xmlChar *) CUR_PTR;
7542 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007543 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007544 return (NULL);
7545 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007546 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007547
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007548 /*
7549 * try to handle in this routine the most common case where no
7550 * allocation of a new string is required and where content is
7551 * pure ASCII.
7552 */
7553 limit = *in++;
7554 end = ctxt->input->end;
7555 start = in;
7556 if (in >= end) {
7557 const xmlChar *oldbase = ctxt->input->base;
7558 GROW;
7559 if (oldbase != ctxt->input->base) {
7560 long delta = ctxt->input->base - oldbase;
7561 start = start + delta;
7562 in = in + delta;
7563 }
7564 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007565 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007566 if (normalize) {
7567 /*
7568 * Skip any leading spaces
7569 */
7570 while ((in < end) && (*in != limit) &&
7571 ((*in == 0x20) || (*in == 0x9) ||
7572 (*in == 0xA) || (*in == 0xD))) {
7573 in++;
7574 start = in;
7575 if (in >= end) {
7576 const xmlChar *oldbase = ctxt->input->base;
7577 GROW;
7578 if (oldbase != ctxt->input->base) {
7579 long delta = ctxt->input->base - oldbase;
7580 start = start + delta;
7581 in = in + delta;
7582 }
7583 end = ctxt->input->end;
7584 }
7585 }
7586 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7587 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7588 if ((*in++ == 0x20) && (*in == 0x20)) break;
7589 if (in >= end) {
7590 const xmlChar *oldbase = ctxt->input->base;
7591 GROW;
7592 if (oldbase != ctxt->input->base) {
7593 long delta = ctxt->input->base - oldbase;
7594 start = start + delta;
7595 in = in + delta;
7596 }
7597 end = ctxt->input->end;
7598 }
7599 }
7600 last = in;
7601 /*
7602 * skip the trailing blanks
7603 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007604 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007605 while ((in < end) && (*in != limit) &&
7606 ((*in == 0x20) || (*in == 0x9) ||
7607 (*in == 0xA) || (*in == 0xD))) {
7608 in++;
7609 if (in >= end) {
7610 const xmlChar *oldbase = ctxt->input->base;
7611 GROW;
7612 if (oldbase != ctxt->input->base) {
7613 long delta = ctxt->input->base - oldbase;
7614 start = start + delta;
7615 in = in + delta;
7616 last = last + delta;
7617 }
7618 end = ctxt->input->end;
7619 }
7620 }
7621 if (*in != limit) goto need_complex;
7622 } else {
7623 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7624 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7625 in++;
7626 if (in >= end) {
7627 const xmlChar *oldbase = ctxt->input->base;
7628 GROW;
7629 if (oldbase != ctxt->input->base) {
7630 long delta = ctxt->input->base - oldbase;
7631 start = start + delta;
7632 in = in + delta;
7633 }
7634 end = ctxt->input->end;
7635 }
7636 }
7637 last = in;
7638 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007639 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007640 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007641 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007642 *len = last - start;
7643 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007645 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007646 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007647 }
7648 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007649 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007650 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007651need_complex:
7652 if (alloc) *alloc = 1;
7653 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007654}
7655
7656/**
7657 * xmlParseAttribute2:
7658 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007659 * @pref: the element prefix
7660 * @elem: the element name
7661 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007662 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007663 * @len: an int * to save the length of the attribute
7664 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007665 *
7666 * parse an attribute in the new SAX2 framework.
7667 *
7668 * Returns the attribute name, and the value in *value, .
7669 */
7670
7671static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007672xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7673 const xmlChar *pref, const xmlChar *elem,
7674 const xmlChar **prefix, xmlChar **value,
7675 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007676 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00007677 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007678 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007679
7680 *value = NULL;
7681 GROW;
7682 name = xmlParseQName(ctxt, prefix);
7683 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007684 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7685 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007686 return(NULL);
7687 }
7688
7689 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007690 * get the type if needed
7691 */
7692 if (ctxt->attsSpecial != NULL) {
7693 int type;
7694
7695 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7696 pref, elem, *prefix, name);
7697 if (type != 0) normalize = 1;
7698 }
7699
7700 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 * read the value
7702 */
7703 SKIP_BLANKS;
7704 if (RAW == '=') {
7705 NEXT;
7706 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007707 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 ctxt->instate = XML_PARSER_CONTENT;
7709 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007710 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007711 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007712 return(NULL);
7713 }
7714
Daniel Veillardd8925572005-06-08 22:34:55 +00007715 if (*prefix == ctxt->str_xml) {
7716 /*
7717 * Check that xml:lang conforms to the specification
7718 * No more registered as an error, just generate a warning now
7719 * since this was deprecated in XML second edition
7720 */
7721 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
7722 internal_val = xmlStrndup(val, *len);
7723 if (!xmlCheckLanguageID(internal_val)) {
7724 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7725 "Malformed value for xml:lang : %s\n",
7726 internal_val, NULL);
7727 }
7728 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007729
Daniel Veillardd8925572005-06-08 22:34:55 +00007730 /*
7731 * Check that xml:space conforms to the specification
7732 */
7733 if (xmlStrEqual(name, BAD_CAST "space")) {
7734 internal_val = xmlStrndup(val, *len);
7735 if (xmlStrEqual(internal_val, BAD_CAST "default"))
7736 *(ctxt->space) = 0;
7737 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
7738 *(ctxt->space) = 1;
7739 else {
7740 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007741"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007742 internal_val, NULL);
7743 }
7744 }
7745 if (internal_val) {
7746 xmlFree(internal_val);
7747 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007749
7750 *value = val;
7751 return(name);
7752}
7753
7754/**
7755 * xmlParseStartTag2:
7756 * @ctxt: an XML parser context
7757 *
7758 * parse a start of tag either for rule element or
7759 * EmptyElement. In both case we don't parse the tag closing chars.
7760 * This routine is called when running SAX2 parsing
7761 *
7762 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7763 *
7764 * [ WFC: Unique Att Spec ]
7765 * No attribute name may appear more than once in the same start-tag or
7766 * empty-element tag.
7767 *
7768 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7769 *
7770 * [ WFC: Unique Att Spec ]
7771 * No attribute name may appear more than once in the same start-tag or
7772 * empty-element tag.
7773 *
7774 * With namespace:
7775 *
7776 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7777 *
7778 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7779 *
7780 * Returns the element name parsed
7781 */
7782
7783static const xmlChar *
7784xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007785 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007786 const xmlChar *localname;
7787 const xmlChar *prefix;
7788 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007789 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007790 const xmlChar *nsname;
7791 xmlChar *attvalue;
7792 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007793 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007794 int nratts, nbatts, nbdef;
7795 int i, j, nbNs, attval;
7796 const xmlChar *base;
7797 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00007798 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007799
7800 if (RAW != '<') return(NULL);
7801 NEXT1;
7802
7803 /*
7804 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7805 * point since the attribute values may be stored as pointers to
7806 * the buffer and calling SHRINK would destroy them !
7807 * The Shrinking is only possible once the full set of attribute
7808 * callbacks have been done.
7809 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007810reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007811 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007812 base = ctxt->input->base;
7813 cur = ctxt->input->cur - ctxt->input->base;
7814 nbatts = 0;
7815 nratts = 0;
7816 nbdef = 0;
7817 nbNs = 0;
7818 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00007819 /* Forget any namespaces added during an earlier parse of this element. */
7820 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007821
7822 localname = xmlParseQName(ctxt, &prefix);
7823 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007824 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7825 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007826 return(NULL);
7827 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007828 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007829
7830 /*
7831 * Now parse the attributes, it ends up with the ending
7832 *
7833 * (S Attribute)* S?
7834 */
7835 SKIP_BLANKS;
7836 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007837 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007838
7839 while ((RAW != '>') &&
7840 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007841 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007842 const xmlChar *q = CUR_PTR;
7843 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007844 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007845
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007846 attname = xmlParseAttribute2(ctxt, prefix, localname,
7847 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007848 if ((attname != NULL) && (attvalue != NULL)) {
7849 if (len < 0) len = xmlStrlen(attvalue);
7850 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007851 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7852 xmlURIPtr uri;
7853
7854 if (*URL != 0) {
7855 uri = xmlParseURI((const char *) URL);
7856 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007857 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7858 "xmlns: %s not a valid URI\n",
7859 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007860 } else {
7861 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007862 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7863 "xmlns: URI %s is not absolute\n",
7864 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007865 }
7866 xmlFreeURI(uri);
7867 }
7868 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007869 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007870 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007871 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007872 for (j = 1;j <= nbNs;j++)
7873 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7874 break;
7875 if (j <= nbNs)
7876 xmlErrAttributeDup(ctxt, NULL, attname);
7877 else
7878 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007879 if (alloc != 0) xmlFree(attvalue);
7880 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007881 continue;
7882 }
7883 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007884 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7885 xmlURIPtr uri;
7886
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007887 if (attname == ctxt->str_xml) {
7888 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007889 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7890 "xml namespace prefix mapped to wrong URI\n",
7891 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007892 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007893 /*
7894 * Do not keep a namespace definition node
7895 */
7896 if (alloc != 0) xmlFree(attvalue);
7897 SKIP_BLANKS;
7898 continue;
7899 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007900 uri = xmlParseURI((const char *) URL);
7901 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007902 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7903 "xmlns:%s: '%s' is not a valid URI\n",
7904 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007905 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007906 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007907 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7908 "xmlns:%s: URI %s is not absolute\n",
7909 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007910 }
7911 xmlFreeURI(uri);
7912 }
7913
Daniel Veillard0fb18932003-09-07 09:14:37 +00007914 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007915 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007916 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007917 for (j = 1;j <= nbNs;j++)
7918 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7919 break;
7920 if (j <= nbNs)
7921 xmlErrAttributeDup(ctxt, aprefix, attname);
7922 else
7923 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007924 if (alloc != 0) xmlFree(attvalue);
7925 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007926 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007927 continue;
7928 }
7929
7930 /*
7931 * Add the pair to atts
7932 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007933 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7934 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007935 if (attvalue[len] == 0)
7936 xmlFree(attvalue);
7937 goto failed;
7938 }
7939 maxatts = ctxt->maxatts;
7940 atts = ctxt->atts;
7941 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007942 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007943 atts[nbatts++] = attname;
7944 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007945 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007946 atts[nbatts++] = attvalue;
7947 attvalue += len;
7948 atts[nbatts++] = attvalue;
7949 /*
7950 * tag if some deallocation is needed
7951 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007952 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007953 } else {
7954 if ((attvalue != NULL) && (attvalue[len] == 0))
7955 xmlFree(attvalue);
7956 }
7957
7958failed:
7959
7960 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007961 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007962 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7963 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007964 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7966 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007967 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007968 }
7969 SKIP_BLANKS;
7970 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7971 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007972 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007973 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007974 break;
7975 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007976 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007977 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007978 }
7979
Daniel Veillard0fb18932003-09-07 09:14:37 +00007980 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007981 * The attributes defaulting
7982 */
7983 if (ctxt->attsDefault != NULL) {
7984 xmlDefAttrsPtr defaults;
7985
7986 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7987 if (defaults != NULL) {
7988 for (i = 0;i < defaults->nbAttrs;i++) {
7989 attname = defaults->values[4 * i];
7990 aprefix = defaults->values[4 * i + 1];
7991
7992 /*
7993 * special work for namespaces defaulted defs
7994 */
7995 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7996 /*
7997 * check that it's not a defined namespace
7998 */
7999 for (j = 1;j <= nbNs;j++)
8000 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8001 break;
8002 if (j <= nbNs) continue;
8003
8004 nsname = xmlGetNamespace(ctxt, NULL);
8005 if (nsname != defaults->values[4 * i + 2]) {
8006 if (nsPush(ctxt, NULL,
8007 defaults->values[4 * i + 2]) > 0)
8008 nbNs++;
8009 }
8010 } else if (aprefix == ctxt->str_xmlns) {
8011 /*
8012 * check that it's not a defined namespace
8013 */
8014 for (j = 1;j <= nbNs;j++)
8015 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8016 break;
8017 if (j <= nbNs) continue;
8018
8019 nsname = xmlGetNamespace(ctxt, attname);
8020 if (nsname != defaults->values[2]) {
8021 if (nsPush(ctxt, attname,
8022 defaults->values[4 * i + 2]) > 0)
8023 nbNs++;
8024 }
8025 } else {
8026 /*
8027 * check that it's not a defined attribute
8028 */
8029 for (j = 0;j < nbatts;j+=5) {
8030 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8031 break;
8032 }
8033 if (j < nbatts) continue;
8034
8035 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8036 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008037 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008038 }
8039 maxatts = ctxt->maxatts;
8040 atts = ctxt->atts;
8041 }
8042 atts[nbatts++] = attname;
8043 atts[nbatts++] = aprefix;
8044 if (aprefix == NULL)
8045 atts[nbatts++] = NULL;
8046 else
8047 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
8048 atts[nbatts++] = defaults->values[4 * i + 2];
8049 atts[nbatts++] = defaults->values[4 * i + 3];
8050 nbdef++;
8051 }
8052 }
8053 }
8054 }
8055
Daniel Veillarde70c8772003-11-25 07:21:18 +00008056 /*
8057 * The attributes checkings
8058 */
8059 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008060 /*
8061 * The default namespace does not apply to attribute names.
8062 */
8063 if (atts[i + 1] != NULL) {
8064 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8065 if (nsname == NULL) {
8066 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8067 "Namespace prefix %s for %s on %s is not defined\n",
8068 atts[i + 1], atts[i], localname);
8069 }
8070 atts[i + 2] = nsname;
8071 } else
8072 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008073 /*
8074 * [ WFC: Unique Att Spec ]
8075 * No attribute name may appear more than once in the same
8076 * start-tag or empty-element tag.
8077 * As extended by the Namespace in XML REC.
8078 */
8079 for (j = 0; j < i;j += 5) {
8080 if (atts[i] == atts[j]) {
8081 if (atts[i+1] == atts[j+1]) {
8082 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8083 break;
8084 }
8085 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8086 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8087 "Namespaced Attribute %s in '%s' redefined\n",
8088 atts[i], nsname, NULL);
8089 break;
8090 }
8091 }
8092 }
8093 }
8094
Daniel Veillarde57ec792003-09-10 10:50:59 +00008095 nsname = xmlGetNamespace(ctxt, prefix);
8096 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008097 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8098 "Namespace prefix %s on %s is not defined\n",
8099 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008100 }
8101 *pref = prefix;
8102 *URI = nsname;
8103
8104 /*
8105 * SAX: Start of Element !
8106 */
8107 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8108 (!ctxt->disableSAX)) {
8109 if (nbNs > 0)
8110 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8111 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8112 nbatts / 5, nbdef, atts);
8113 else
8114 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8115 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8116 }
8117
8118 /*
8119 * Free up attribute allocated strings if needed
8120 */
8121 if (attval != 0) {
8122 for (i = 3,j = 0; j < nratts;i += 5,j++)
8123 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8124 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008125 }
8126
8127 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008128
8129base_changed:
8130 /*
8131 * the attribute strings are valid iif the base didn't changed
8132 */
8133 if (attval != 0) {
8134 for (i = 3,j = 0; j < nratts;i += 5,j++)
8135 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8136 xmlFree((xmlChar *) atts[i]);
8137 }
8138 ctxt->input->cur = ctxt->input->base + cur;
8139 if (ctxt->wellFormed == 1) {
8140 goto reparse;
8141 }
8142 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008143}
8144
8145/**
8146 * xmlParseEndTag2:
8147 * @ctxt: an XML parser context
8148 * @line: line of the start tag
8149 * @nsNr: number of namespaces on the start tag
8150 *
8151 * parse an end of tag
8152 *
8153 * [42] ETag ::= '</' Name S? '>'
8154 *
8155 * With namespace
8156 *
8157 * [NS 9] ETag ::= '</' QName S? '>'
8158 */
8159
8160static void
8161xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008162 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008163 const xmlChar *name;
8164
8165 GROW;
8166 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008167 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008168 return;
8169 }
8170 SKIP(2);
8171
William M. Brack13dfa872004-09-18 04:52:08 +00008172 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008173 if (ctxt->input->cur[tlen] == '>') {
8174 ctxt->input->cur += tlen + 1;
8175 goto done;
8176 }
8177 ctxt->input->cur += tlen;
8178 name = (xmlChar*)1;
8179 } else {
8180 if (prefix == NULL)
8181 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8182 else
8183 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8184 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008185
8186 /*
8187 * We should definitely be at the ending "S? '>'" part
8188 */
8189 GROW;
8190 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008191 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008192 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008193 } else
8194 NEXT1;
8195
8196 /*
8197 * [ WFC: Element Type Match ]
8198 * The Name in an element's end-tag must match the element type in the
8199 * start-tag.
8200 *
8201 */
8202 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008203 if (name == NULL) name = BAD_CAST "unparseable";
8204 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008206 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 }
8208
8209 /*
8210 * SAX: End of Tag
8211 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008212done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8214 (!ctxt->disableSAX))
8215 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
8216
Daniel Veillard0fb18932003-09-07 09:14:37 +00008217 spacePop(ctxt);
8218 if (nsNr != 0)
8219 nsPop(ctxt, nsNr);
8220 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008221}
8222
8223/**
Owen Taylor3473f882001-02-23 17:55:21 +00008224 * xmlParseCDSect:
8225 * @ctxt: an XML parser context
8226 *
8227 * Parse escaped pure raw content.
8228 *
8229 * [18] CDSect ::= CDStart CData CDEnd
8230 *
8231 * [19] CDStart ::= '<![CDATA['
8232 *
8233 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8234 *
8235 * [21] CDEnd ::= ']]>'
8236 */
8237void
8238xmlParseCDSect(xmlParserCtxtPtr ctxt) {
8239 xmlChar *buf = NULL;
8240 int len = 0;
8241 int size = XML_PARSER_BUFFER_SIZE;
8242 int r, rl;
8243 int s, sl;
8244 int cur, l;
8245 int count = 0;
8246
Daniel Veillard8f597c32003-10-06 08:19:27 +00008247 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008248 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008249 SKIP(9);
8250 } else
8251 return;
8252
8253 ctxt->instate = XML_PARSER_CDATA_SECTION;
8254 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00008255 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008256 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008257 ctxt->instate = XML_PARSER_CONTENT;
8258 return;
8259 }
8260 NEXTL(rl);
8261 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00008262 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008263 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008264 ctxt->instate = XML_PARSER_CONTENT;
8265 return;
8266 }
8267 NEXTL(sl);
8268 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008269 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008270 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008271 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008272 return;
8273 }
William M. Brack871611b2003-10-18 04:53:14 +00008274 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008275 ((r != ']') || (s != ']') || (cur != '>'))) {
8276 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008277 xmlChar *tmp;
8278
Owen Taylor3473f882001-02-23 17:55:21 +00008279 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008280 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8281 if (tmp == NULL) {
8282 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008283 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008284 return;
8285 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008286 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008287 }
8288 COPY_BUF(rl,buf,len,r);
8289 r = s;
8290 rl = sl;
8291 s = cur;
8292 sl = l;
8293 count++;
8294 if (count > 50) {
8295 GROW;
8296 count = 0;
8297 }
8298 NEXTL(l);
8299 cur = CUR_CHAR(l);
8300 }
8301 buf[len] = 0;
8302 ctxt->instate = XML_PARSER_CONTENT;
8303 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008304 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008305 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008306 xmlFree(buf);
8307 return;
8308 }
8309 NEXTL(l);
8310
8311 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008312 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008313 */
8314 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8315 if (ctxt->sax->cdataBlock != NULL)
8316 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008317 else if (ctxt->sax->characters != NULL)
8318 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008319 }
8320 xmlFree(buf);
8321}
8322
8323/**
8324 * xmlParseContent:
8325 * @ctxt: an XML parser context
8326 *
8327 * Parse a content:
8328 *
8329 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8330 */
8331
8332void
8333xmlParseContent(xmlParserCtxtPtr ctxt) {
8334 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008335 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008336 ((RAW != '<') || (NXT(1) != '/'))) {
8337 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008338 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008339 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008340
8341 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008342 * First case : a Processing Instruction.
8343 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008344 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008345 xmlParsePI(ctxt);
8346 }
8347
8348 /*
8349 * Second case : a CDSection
8350 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008351 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008352 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008353 xmlParseCDSect(ctxt);
8354 }
8355
8356 /*
8357 * Third case : a comment
8358 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008359 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008360 (NXT(2) == '-') && (NXT(3) == '-')) {
8361 xmlParseComment(ctxt);
8362 ctxt->instate = XML_PARSER_CONTENT;
8363 }
8364
8365 /*
8366 * Fourth case : a sub-element.
8367 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008368 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008369 xmlParseElement(ctxt);
8370 }
8371
8372 /*
8373 * Fifth case : a reference. If if has not been resolved,
8374 * parsing returns it's Name, create the node
8375 */
8376
Daniel Veillard21a0f912001-02-25 19:54:14 +00008377 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008378 xmlParseReference(ctxt);
8379 }
8380
8381 /*
8382 * Last case, text. Note that References are handled directly.
8383 */
8384 else {
8385 xmlParseCharData(ctxt, 0);
8386 }
8387
8388 GROW;
8389 /*
8390 * Pop-up of finished entities.
8391 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008392 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008393 xmlPopInput(ctxt);
8394 SHRINK;
8395
Daniel Veillardfdc91562002-07-01 21:52:03 +00008396 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008397 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8398 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008399 ctxt->instate = XML_PARSER_EOF;
8400 break;
8401 }
8402 }
8403}
8404
8405/**
8406 * xmlParseElement:
8407 * @ctxt: an XML parser context
8408 *
8409 * parse an XML element, this is highly recursive
8410 *
8411 * [39] element ::= EmptyElemTag | STag content ETag
8412 *
8413 * [ WFC: Element Type Match ]
8414 * The Name in an element's end-tag must match the element type in the
8415 * start-tag.
8416 *
Owen Taylor3473f882001-02-23 17:55:21 +00008417 */
8418
8419void
8420xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008421 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008422 const xmlChar *prefix;
8423 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008424 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008425 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008426 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008427 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008428
8429 /* Capture start position */
8430 if (ctxt->record_info) {
8431 node_info.begin_pos = ctxt->input->consumed +
8432 (CUR_PTR - ctxt->input->base);
8433 node_info.begin_line = ctxt->input->line;
8434 }
8435
8436 if (ctxt->spaceNr == 0)
8437 spacePush(ctxt, -1);
8438 else
8439 spacePush(ctxt, *ctxt->space);
8440
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008441 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008442#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008443 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008444#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008445 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008446#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008447 else
8448 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008449#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008450 if (name == NULL) {
8451 spacePop(ctxt);
8452 return;
8453 }
8454 namePush(ctxt, name);
8455 ret = ctxt->node;
8456
Daniel Veillard4432df22003-09-28 18:58:27 +00008457#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008458 /*
8459 * [ VC: Root Element Type ]
8460 * The Name in the document type declaration must match the element
8461 * type of the root element.
8462 */
8463 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8464 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8465 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008466#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008467
8468 /*
8469 * Check for an Empty Element.
8470 */
8471 if ((RAW == '/') && (NXT(1) == '>')) {
8472 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008473 if (ctxt->sax2) {
8474 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8475 (!ctxt->disableSAX))
8476 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008477#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008478 } else {
8479 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8480 (!ctxt->disableSAX))
8481 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008482#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008483 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008484 namePop(ctxt);
8485 spacePop(ctxt);
8486 if (nsNr != ctxt->nsNr)
8487 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008488 if ( ret != NULL && ctxt->record_info ) {
8489 node_info.end_pos = ctxt->input->consumed +
8490 (CUR_PTR - ctxt->input->base);
8491 node_info.end_line = ctxt->input->line;
8492 node_info.node = ret;
8493 xmlParserAddNodeInfo(ctxt, &node_info);
8494 }
8495 return;
8496 }
8497 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008498 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008499 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008500 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8501 "Couldn't find end of Start Tag %s line %d\n",
8502 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008503
8504 /*
8505 * end of parsing of this node.
8506 */
8507 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008508 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008509 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008510 if (nsNr != ctxt->nsNr)
8511 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008512
8513 /*
8514 * Capture end position and add node
8515 */
8516 if ( ret != NULL && ctxt->record_info ) {
8517 node_info.end_pos = ctxt->input->consumed +
8518 (CUR_PTR - ctxt->input->base);
8519 node_info.end_line = ctxt->input->line;
8520 node_info.node = ret;
8521 xmlParserAddNodeInfo(ctxt, &node_info);
8522 }
8523 return;
8524 }
8525
8526 /*
8527 * Parse the content of the element:
8528 */
8529 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008530 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008531 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008532 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008533 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008534
8535 /*
8536 * end of parsing of this node.
8537 */
8538 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008539 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008540 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 if (nsNr != ctxt->nsNr)
8542 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008543 return;
8544 }
8545
8546 /*
8547 * parse the end of tag: '</' should be here.
8548 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008549 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008550 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008551 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008552 }
8553#ifdef LIBXML_SAX1_ENABLED
8554 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008555 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008556#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008557
8558 /*
8559 * Capture end position and add node
8560 */
8561 if ( ret != NULL && ctxt->record_info ) {
8562 node_info.end_pos = ctxt->input->consumed +
8563 (CUR_PTR - ctxt->input->base);
8564 node_info.end_line = ctxt->input->line;
8565 node_info.node = ret;
8566 xmlParserAddNodeInfo(ctxt, &node_info);
8567 }
8568}
8569
8570/**
8571 * xmlParseVersionNum:
8572 * @ctxt: an XML parser context
8573 *
8574 * parse the XML version value.
8575 *
8576 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8577 *
8578 * Returns the string giving the XML version number, or NULL
8579 */
8580xmlChar *
8581xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8582 xmlChar *buf = NULL;
8583 int len = 0;
8584 int size = 10;
8585 xmlChar cur;
8586
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008587 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008588 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008589 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008590 return(NULL);
8591 }
8592 cur = CUR;
8593 while (((cur >= 'a') && (cur <= 'z')) ||
8594 ((cur >= 'A') && (cur <= 'Z')) ||
8595 ((cur >= '0') && (cur <= '9')) ||
8596 (cur == '_') || (cur == '.') ||
8597 (cur == ':') || (cur == '-')) {
8598 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008599 xmlChar *tmp;
8600
Owen Taylor3473f882001-02-23 17:55:21 +00008601 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008602 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8603 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008604 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008605 return(NULL);
8606 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008607 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008608 }
8609 buf[len++] = cur;
8610 NEXT;
8611 cur=CUR;
8612 }
8613 buf[len] = 0;
8614 return(buf);
8615}
8616
8617/**
8618 * xmlParseVersionInfo:
8619 * @ctxt: an XML parser context
8620 *
8621 * parse the XML version.
8622 *
8623 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8624 *
8625 * [25] Eq ::= S? '=' S?
8626 *
8627 * Returns the version string, e.g. "1.0"
8628 */
8629
8630xmlChar *
8631xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8632 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008633
Daniel Veillarda07050d2003-10-19 14:46:32 +00008634 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008635 SKIP(7);
8636 SKIP_BLANKS;
8637 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008638 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008639 return(NULL);
8640 }
8641 NEXT;
8642 SKIP_BLANKS;
8643 if (RAW == '"') {
8644 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008645 version = xmlParseVersionNum(ctxt);
8646 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008647 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008648 } else
8649 NEXT;
8650 } else if (RAW == '\''){
8651 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008652 version = xmlParseVersionNum(ctxt);
8653 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 } else
8656 NEXT;
8657 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008658 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008659 }
8660 }
8661 return(version);
8662}
8663
8664/**
8665 * xmlParseEncName:
8666 * @ctxt: an XML parser context
8667 *
8668 * parse the XML encoding name
8669 *
8670 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8671 *
8672 * Returns the encoding name value or NULL
8673 */
8674xmlChar *
8675xmlParseEncName(xmlParserCtxtPtr ctxt) {
8676 xmlChar *buf = NULL;
8677 int len = 0;
8678 int size = 10;
8679 xmlChar cur;
8680
8681 cur = CUR;
8682 if (((cur >= 'a') && (cur <= 'z')) ||
8683 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008684 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008685 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008686 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 return(NULL);
8688 }
8689
8690 buf[len++] = cur;
8691 NEXT;
8692 cur = CUR;
8693 while (((cur >= 'a') && (cur <= 'z')) ||
8694 ((cur >= 'A') && (cur <= 'Z')) ||
8695 ((cur >= '0') && (cur <= '9')) ||
8696 (cur == '.') || (cur == '_') ||
8697 (cur == '-')) {
8698 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008699 xmlChar *tmp;
8700
Owen Taylor3473f882001-02-23 17:55:21 +00008701 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008702 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8703 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008704 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008705 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 return(NULL);
8707 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008708 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008709 }
8710 buf[len++] = cur;
8711 NEXT;
8712 cur = CUR;
8713 if (cur == 0) {
8714 SHRINK;
8715 GROW;
8716 cur = CUR;
8717 }
8718 }
8719 buf[len] = 0;
8720 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008721 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008722 }
8723 return(buf);
8724}
8725
8726/**
8727 * xmlParseEncodingDecl:
8728 * @ctxt: an XML parser context
8729 *
8730 * parse the XML encoding declaration
8731 *
8732 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8733 *
8734 * this setups the conversion filters.
8735 *
8736 * Returns the encoding value or NULL
8737 */
8738
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008739const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008740xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8741 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008742
8743 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008744 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008745 SKIP(8);
8746 SKIP_BLANKS;
8747 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008748 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008749 return(NULL);
8750 }
8751 NEXT;
8752 SKIP_BLANKS;
8753 if (RAW == '"') {
8754 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008755 encoding = xmlParseEncName(ctxt);
8756 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008757 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008758 } else
8759 NEXT;
8760 } else if (RAW == '\''){
8761 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008762 encoding = xmlParseEncName(ctxt);
8763 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008764 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008765 } else
8766 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008767 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008768 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008769 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008770 /*
8771 * UTF-16 encoding stwich has already taken place at this stage,
8772 * more over the little-endian/big-endian selection is already done
8773 */
8774 if ((encoding != NULL) &&
8775 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8776 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008777 if (ctxt->encoding != NULL)
8778 xmlFree((xmlChar *) ctxt->encoding);
8779 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008780 }
8781 /*
8782 * UTF-8 encoding is handled natively
8783 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008784 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008785 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8786 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008787 if (ctxt->encoding != NULL)
8788 xmlFree((xmlChar *) ctxt->encoding);
8789 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008790 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008791 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008792 xmlCharEncodingHandlerPtr handler;
8793
8794 if (ctxt->input->encoding != NULL)
8795 xmlFree((xmlChar *) ctxt->input->encoding);
8796 ctxt->input->encoding = encoding;
8797
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008798 handler = xmlFindCharEncodingHandler((const char *) encoding);
8799 if (handler != NULL) {
8800 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008801 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008802 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008803 "Unsupported encoding %s\n", encoding);
8804 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008805 }
8806 }
8807 }
8808 return(encoding);
8809}
8810
8811/**
8812 * xmlParseSDDecl:
8813 * @ctxt: an XML parser context
8814 *
8815 * parse the XML standalone declaration
8816 *
8817 * [32] SDDecl ::= S 'standalone' Eq
8818 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8819 *
8820 * [ VC: Standalone Document Declaration ]
8821 * TODO The standalone document declaration must have the value "no"
8822 * if any external markup declarations contain declarations of:
8823 * - attributes with default values, if elements to which these
8824 * attributes apply appear in the document without specifications
8825 * of values for these attributes, or
8826 * - entities (other than amp, lt, gt, apos, quot), if references
8827 * to those entities appear in the document, or
8828 * - attributes with values subject to normalization, where the
8829 * attribute appears in the document with a value which will change
8830 * as a result of normalization, or
8831 * - element types with element content, if white space occurs directly
8832 * within any instance of those types.
8833 *
8834 * Returns 1 if standalone, 0 otherwise
8835 */
8836
8837int
8838xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8839 int standalone = -1;
8840
8841 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008842 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008843 SKIP(10);
8844 SKIP_BLANKS;
8845 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008846 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008847 return(standalone);
8848 }
8849 NEXT;
8850 SKIP_BLANKS;
8851 if (RAW == '\''){
8852 NEXT;
8853 if ((RAW == 'n') && (NXT(1) == 'o')) {
8854 standalone = 0;
8855 SKIP(2);
8856 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8857 (NXT(2) == 's')) {
8858 standalone = 1;
8859 SKIP(3);
8860 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008861 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008862 }
8863 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008864 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008865 } else
8866 NEXT;
8867 } else if (RAW == '"'){
8868 NEXT;
8869 if ((RAW == 'n') && (NXT(1) == 'o')) {
8870 standalone = 0;
8871 SKIP(2);
8872 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8873 (NXT(2) == 's')) {
8874 standalone = 1;
8875 SKIP(3);
8876 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008877 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008878 }
8879 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008880 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008881 } else
8882 NEXT;
8883 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008884 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008885 }
8886 }
8887 return(standalone);
8888}
8889
8890/**
8891 * xmlParseXMLDecl:
8892 * @ctxt: an XML parser context
8893 *
8894 * parse an XML declaration header
8895 *
8896 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8897 */
8898
8899void
8900xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8901 xmlChar *version;
8902
8903 /*
8904 * We know that '<?xml' is here.
8905 */
8906 SKIP(5);
8907
William M. Brack76e95df2003-10-18 16:20:14 +00008908 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8910 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008911 }
8912 SKIP_BLANKS;
8913
8914 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008915 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008916 */
8917 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008918 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008919 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008920 } else {
8921 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8922 /*
8923 * TODO: Blueberry should be detected here
8924 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008925 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8926 "Unsupported version '%s'\n",
8927 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008928 }
8929 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008930 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008931 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008932 }
Owen Taylor3473f882001-02-23 17:55:21 +00008933
8934 /*
8935 * We may have the encoding declaration
8936 */
William M. Brack76e95df2003-10-18 16:20:14 +00008937 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008938 if ((RAW == '?') && (NXT(1) == '>')) {
8939 SKIP(2);
8940 return;
8941 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008942 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008943 }
8944 xmlParseEncodingDecl(ctxt);
8945 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8946 /*
8947 * The XML REC instructs us to stop parsing right here
8948 */
8949 return;
8950 }
8951
8952 /*
8953 * We may have the standalone status.
8954 */
William M. Brack76e95df2003-10-18 16:20:14 +00008955 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008956 if ((RAW == '?') && (NXT(1) == '>')) {
8957 SKIP(2);
8958 return;
8959 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008960 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008961 }
8962 SKIP_BLANKS;
8963 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8964
8965 SKIP_BLANKS;
8966 if ((RAW == '?') && (NXT(1) == '>')) {
8967 SKIP(2);
8968 } else if (RAW == '>') {
8969 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008970 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008971 NEXT;
8972 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008973 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008974 MOVETO_ENDTAG(CUR_PTR);
8975 NEXT;
8976 }
8977}
8978
8979/**
8980 * xmlParseMisc:
8981 * @ctxt: an XML parser context
8982 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008983 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008984 *
8985 * [27] Misc ::= Comment | PI | S
8986 */
8987
8988void
8989xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008990 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008991 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008992 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008993 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008994 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008995 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008996 NEXT;
8997 } else
8998 xmlParseComment(ctxt);
8999 }
9000}
9001
9002/**
9003 * xmlParseDocument:
9004 * @ctxt: an XML parser context
9005 *
9006 * parse an XML document (and build a tree if using the standard SAX
9007 * interface).
9008 *
9009 * [1] document ::= prolog element Misc*
9010 *
9011 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9012 *
9013 * Returns 0, -1 in case of error. the parser context is augmented
9014 * as a result of the parsing.
9015 */
9016
9017int
9018xmlParseDocument(xmlParserCtxtPtr ctxt) {
9019 xmlChar start[4];
9020 xmlCharEncoding enc;
9021
9022 xmlInitParser();
9023
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009024 if ((ctxt == NULL) || (ctxt->input == NULL))
9025 return(-1);
9026
Owen Taylor3473f882001-02-23 17:55:21 +00009027 GROW;
9028
9029 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009030 * SAX: detecting the level.
9031 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009032 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009033
9034 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009035 * SAX: beginning of the document processing.
9036 */
9037 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9038 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9039
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009040 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9041 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009042 /*
9043 * Get the 4 first bytes and decode the charset
9044 * if enc != XML_CHAR_ENCODING_NONE
9045 * plug some encoding conversion routines.
9046 */
9047 start[0] = RAW;
9048 start[1] = NXT(1);
9049 start[2] = NXT(2);
9050 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009051 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009052 if (enc != XML_CHAR_ENCODING_NONE) {
9053 xmlSwitchEncoding(ctxt, enc);
9054 }
Owen Taylor3473f882001-02-23 17:55:21 +00009055 }
9056
9057
9058 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009059 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009060 }
9061
9062 /*
9063 * Check for the XMLDecl in the Prolog.
9064 */
9065 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009066 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009067
9068 /*
9069 * Note that we will switch encoding on the fly.
9070 */
9071 xmlParseXMLDecl(ctxt);
9072 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9073 /*
9074 * The XML REC instructs us to stop parsing right here
9075 */
9076 return(-1);
9077 }
9078 ctxt->standalone = ctxt->input->standalone;
9079 SKIP_BLANKS;
9080 } else {
9081 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9082 }
9083 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9084 ctxt->sax->startDocument(ctxt->userData);
9085
9086 /*
9087 * The Misc part of the Prolog
9088 */
9089 GROW;
9090 xmlParseMisc(ctxt);
9091
9092 /*
9093 * Then possibly doc type declaration(s) and more Misc
9094 * (doctypedecl Misc*)?
9095 */
9096 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009097 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009098
9099 ctxt->inSubset = 1;
9100 xmlParseDocTypeDecl(ctxt);
9101 if (RAW == '[') {
9102 ctxt->instate = XML_PARSER_DTD;
9103 xmlParseInternalSubset(ctxt);
9104 }
9105
9106 /*
9107 * Create and update the external subset.
9108 */
9109 ctxt->inSubset = 2;
9110 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9111 (!ctxt->disableSAX))
9112 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9113 ctxt->extSubSystem, ctxt->extSubURI);
9114 ctxt->inSubset = 0;
9115
9116
9117 ctxt->instate = XML_PARSER_PROLOG;
9118 xmlParseMisc(ctxt);
9119 }
9120
9121 /*
9122 * Time to start parsing the tree itself
9123 */
9124 GROW;
9125 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009126 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9127 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009128 } else {
9129 ctxt->instate = XML_PARSER_CONTENT;
9130 xmlParseElement(ctxt);
9131 ctxt->instate = XML_PARSER_EPILOG;
9132
9133
9134 /*
9135 * The Misc part at the end
9136 */
9137 xmlParseMisc(ctxt);
9138
Daniel Veillard561b7f82002-03-20 21:55:57 +00009139 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009140 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009141 }
9142 ctxt->instate = XML_PARSER_EOF;
9143 }
9144
9145 /*
9146 * SAX: end of the document processing.
9147 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009148 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009149 ctxt->sax->endDocument(ctxt->userData);
9150
Daniel Veillard5997aca2002-03-18 18:36:20 +00009151 /*
9152 * Remove locally kept entity definitions if the tree was not built
9153 */
9154 if ((ctxt->myDoc != NULL) &&
9155 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
9156 xmlFreeDoc(ctxt->myDoc);
9157 ctxt->myDoc = NULL;
9158 }
9159
Daniel Veillardc7612992002-02-17 22:47:37 +00009160 if (! ctxt->wellFormed) {
9161 ctxt->valid = 0;
9162 return(-1);
9163 }
Owen Taylor3473f882001-02-23 17:55:21 +00009164 return(0);
9165}
9166
9167/**
9168 * xmlParseExtParsedEnt:
9169 * @ctxt: an XML parser context
9170 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009171 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00009172 * An external general parsed entity is well-formed if it matches the
9173 * production labeled extParsedEnt.
9174 *
9175 * [78] extParsedEnt ::= TextDecl? content
9176 *
9177 * Returns 0, -1 in case of error. the parser context is augmented
9178 * as a result of the parsing.
9179 */
9180
9181int
9182xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
9183 xmlChar start[4];
9184 xmlCharEncoding enc;
9185
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009186 if ((ctxt == NULL) || (ctxt->input == NULL))
9187 return(-1);
9188
Owen Taylor3473f882001-02-23 17:55:21 +00009189 xmlDefaultSAXHandlerInit();
9190
Daniel Veillard309f81d2003-09-23 09:02:53 +00009191 xmlDetectSAX2(ctxt);
9192
Owen Taylor3473f882001-02-23 17:55:21 +00009193 GROW;
9194
9195 /*
9196 * SAX: beginning of the document processing.
9197 */
9198 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9199 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9200
9201 /*
9202 * Get the 4 first bytes and decode the charset
9203 * if enc != XML_CHAR_ENCODING_NONE
9204 * plug some encoding conversion routines.
9205 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009206 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
9207 start[0] = RAW;
9208 start[1] = NXT(1);
9209 start[2] = NXT(2);
9210 start[3] = NXT(3);
9211 enc = xmlDetectCharEncoding(start, 4);
9212 if (enc != XML_CHAR_ENCODING_NONE) {
9213 xmlSwitchEncoding(ctxt, enc);
9214 }
Owen Taylor3473f882001-02-23 17:55:21 +00009215 }
9216
9217
9218 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009219 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009220 }
9221
9222 /*
9223 * Check for the XMLDecl in the Prolog.
9224 */
9225 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009226 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009227
9228 /*
9229 * Note that we will switch encoding on the fly.
9230 */
9231 xmlParseXMLDecl(ctxt);
9232 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9233 /*
9234 * The XML REC instructs us to stop parsing right here
9235 */
9236 return(-1);
9237 }
9238 SKIP_BLANKS;
9239 } else {
9240 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9241 }
9242 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9243 ctxt->sax->startDocument(ctxt->userData);
9244
9245 /*
9246 * Doing validity checking on chunk doesn't make sense
9247 */
9248 ctxt->instate = XML_PARSER_CONTENT;
9249 ctxt->validate = 0;
9250 ctxt->loadsubset = 0;
9251 ctxt->depth = 0;
9252
9253 xmlParseContent(ctxt);
9254
9255 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009257 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009258 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009259 }
9260
9261 /*
9262 * SAX: end of the document processing.
9263 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009265 ctxt->sax->endDocument(ctxt->userData);
9266
9267 if (! ctxt->wellFormed) return(-1);
9268 return(0);
9269}
9270
Daniel Veillard73b013f2003-09-30 12:36:01 +00009271#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009272/************************************************************************
9273 * *
9274 * Progressive parsing interfaces *
9275 * *
9276 ************************************************************************/
9277
9278/**
9279 * xmlParseLookupSequence:
9280 * @ctxt: an XML parser context
9281 * @first: the first char to lookup
9282 * @next: the next char to lookup or zero
9283 * @third: the next char to lookup or zero
9284 *
9285 * Try to find if a sequence (first, next, third) or just (first next) or
9286 * (first) is available in the input stream.
9287 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9288 * to avoid rescanning sequences of bytes, it DOES change the state of the
9289 * parser, do not use liberally.
9290 *
9291 * Returns the index to the current parsing point if the full sequence
9292 * is available, -1 otherwise.
9293 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009294static int
Owen Taylor3473f882001-02-23 17:55:21 +00009295xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9296 xmlChar next, xmlChar third) {
9297 int base, len;
9298 xmlParserInputPtr in;
9299 const xmlChar *buf;
9300
9301 in = ctxt->input;
9302 if (in == NULL) return(-1);
9303 base = in->cur - in->base;
9304 if (base < 0) return(-1);
9305 if (ctxt->checkIndex > base)
9306 base = ctxt->checkIndex;
9307 if (in->buf == NULL) {
9308 buf = in->base;
9309 len = in->length;
9310 } else {
9311 buf = in->buf->buffer->content;
9312 len = in->buf->buffer->use;
9313 }
9314 /* take into account the sequence length */
9315 if (third) len -= 2;
9316 else if (next) len --;
9317 for (;base < len;base++) {
9318 if (buf[base] == first) {
9319 if (third != 0) {
9320 if ((buf[base + 1] != next) ||
9321 (buf[base + 2] != third)) continue;
9322 } else if (next != 0) {
9323 if (buf[base + 1] != next) continue;
9324 }
9325 ctxt->checkIndex = 0;
9326#ifdef DEBUG_PUSH
9327 if (next == 0)
9328 xmlGenericError(xmlGenericErrorContext,
9329 "PP: lookup '%c' found at %d\n",
9330 first, base);
9331 else if (third == 0)
9332 xmlGenericError(xmlGenericErrorContext,
9333 "PP: lookup '%c%c' found at %d\n",
9334 first, next, base);
9335 else
9336 xmlGenericError(xmlGenericErrorContext,
9337 "PP: lookup '%c%c%c' found at %d\n",
9338 first, next, third, base);
9339#endif
9340 return(base - (in->cur - in->base));
9341 }
9342 }
9343 ctxt->checkIndex = base;
9344#ifdef DEBUG_PUSH
9345 if (next == 0)
9346 xmlGenericError(xmlGenericErrorContext,
9347 "PP: lookup '%c' failed\n", first);
9348 else if (third == 0)
9349 xmlGenericError(xmlGenericErrorContext,
9350 "PP: lookup '%c%c' failed\n", first, next);
9351 else
9352 xmlGenericError(xmlGenericErrorContext,
9353 "PP: lookup '%c%c%c' failed\n", first, next, third);
9354#endif
9355 return(-1);
9356}
9357
9358/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009359 * xmlParseGetLasts:
9360 * @ctxt: an XML parser context
9361 * @lastlt: pointer to store the last '<' from the input
9362 * @lastgt: pointer to store the last '>' from the input
9363 *
9364 * Lookup the last < and > in the current chunk
9365 */
9366static void
9367xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9368 const xmlChar **lastgt) {
9369 const xmlChar *tmp;
9370
9371 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9372 xmlGenericError(xmlGenericErrorContext,
9373 "Internal error: xmlParseGetLasts\n");
9374 return;
9375 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009376 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009377 tmp = ctxt->input->end;
9378 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009379 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009380 if (tmp < ctxt->input->base) {
9381 *lastlt = NULL;
9382 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009383 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009384 *lastlt = tmp;
9385 tmp++;
9386 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9387 if (*tmp == '\'') {
9388 tmp++;
9389 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9390 if (tmp < ctxt->input->end) tmp++;
9391 } else if (*tmp == '"') {
9392 tmp++;
9393 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9394 if (tmp < ctxt->input->end) tmp++;
9395 } else
9396 tmp++;
9397 }
9398 if (tmp < ctxt->input->end)
9399 *lastgt = tmp;
9400 else {
9401 tmp = *lastlt;
9402 tmp--;
9403 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9404 if (tmp >= ctxt->input->base)
9405 *lastgt = tmp;
9406 else
9407 *lastgt = NULL;
9408 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009409 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009410 } else {
9411 *lastlt = NULL;
9412 *lastgt = NULL;
9413 }
9414}
9415/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009416 * xmlCheckCdataPush:
9417 * @cur: pointer to the bock of characters
9418 * @len: length of the block in bytes
9419 *
9420 * Check that the block of characters is okay as SCdata content [20]
9421 *
9422 * Returns the number of bytes to pass if okay, a negative index where an
9423 * UTF-8 error occured otherwise
9424 */
9425static int
9426xmlCheckCdataPush(const xmlChar *utf, int len) {
9427 int ix;
9428 unsigned char c;
9429 int codepoint;
9430
9431 if ((utf == NULL) || (len <= 0))
9432 return(0);
9433
9434 for (ix = 0; ix < len;) { /* string is 0-terminated */
9435 c = utf[ix];
9436 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
9437 if (c >= 0x20)
9438 ix++;
9439 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
9440 ix++;
9441 else
9442 return(-ix);
9443 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
9444 if (ix + 2 > len) return(ix);
9445 if ((utf[ix+1] & 0xc0 ) != 0x80)
9446 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009447 codepoint = (utf[ix] & 0x1f) << 6;
9448 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009449 if (!xmlIsCharQ(codepoint))
9450 return(-ix);
9451 ix += 2;
9452 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
9453 if (ix + 3 > len) return(ix);
9454 if (((utf[ix+1] & 0xc0) != 0x80) ||
9455 ((utf[ix+2] & 0xc0) != 0x80))
9456 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009457 codepoint = (utf[ix] & 0xf) << 12;
9458 codepoint |= (utf[ix+1] & 0x3f) << 6;
9459 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009460 if (!xmlIsCharQ(codepoint))
9461 return(-ix);
9462 ix += 3;
9463 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
9464 if (ix + 4 > len) return(ix);
9465 if (((utf[ix+1] & 0xc0) != 0x80) ||
9466 ((utf[ix+2] & 0xc0) != 0x80) ||
9467 ((utf[ix+3] & 0xc0) != 0x80))
9468 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +00009469 codepoint = (utf[ix] & 0x7) << 18;
9470 codepoint |= (utf[ix+1] & 0x3f) << 12;
9471 codepoint |= (utf[ix+2] & 0x3f) << 6;
9472 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009473 if (!xmlIsCharQ(codepoint))
9474 return(-ix);
9475 ix += 4;
9476 } else /* unknown encoding */
9477 return(-ix);
9478 }
9479 return(ix);
9480}
9481
9482/**
Owen Taylor3473f882001-02-23 17:55:21 +00009483 * xmlParseTryOrFinish:
9484 * @ctxt: an XML parser context
9485 * @terminate: last chunk indicator
9486 *
9487 * Try to progress on parsing
9488 *
9489 * Returns zero if no parsing was possible
9490 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009491static int
Owen Taylor3473f882001-02-23 17:55:21 +00009492xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9493 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009494 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009495 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009496 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009497
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009498 if (ctxt->input == NULL)
9499 return(0);
9500
Owen Taylor3473f882001-02-23 17:55:21 +00009501#ifdef DEBUG_PUSH
9502 switch (ctxt->instate) {
9503 case XML_PARSER_EOF:
9504 xmlGenericError(xmlGenericErrorContext,
9505 "PP: try EOF\n"); break;
9506 case XML_PARSER_START:
9507 xmlGenericError(xmlGenericErrorContext,
9508 "PP: try START\n"); break;
9509 case XML_PARSER_MISC:
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: try MISC\n");break;
9512 case XML_PARSER_COMMENT:
9513 xmlGenericError(xmlGenericErrorContext,
9514 "PP: try COMMENT\n");break;
9515 case XML_PARSER_PROLOG:
9516 xmlGenericError(xmlGenericErrorContext,
9517 "PP: try PROLOG\n");break;
9518 case XML_PARSER_START_TAG:
9519 xmlGenericError(xmlGenericErrorContext,
9520 "PP: try START_TAG\n");break;
9521 case XML_PARSER_CONTENT:
9522 xmlGenericError(xmlGenericErrorContext,
9523 "PP: try CONTENT\n");break;
9524 case XML_PARSER_CDATA_SECTION:
9525 xmlGenericError(xmlGenericErrorContext,
9526 "PP: try CDATA_SECTION\n");break;
9527 case XML_PARSER_END_TAG:
9528 xmlGenericError(xmlGenericErrorContext,
9529 "PP: try END_TAG\n");break;
9530 case XML_PARSER_ENTITY_DECL:
9531 xmlGenericError(xmlGenericErrorContext,
9532 "PP: try ENTITY_DECL\n");break;
9533 case XML_PARSER_ENTITY_VALUE:
9534 xmlGenericError(xmlGenericErrorContext,
9535 "PP: try ENTITY_VALUE\n");break;
9536 case XML_PARSER_ATTRIBUTE_VALUE:
9537 xmlGenericError(xmlGenericErrorContext,
9538 "PP: try ATTRIBUTE_VALUE\n");break;
9539 case XML_PARSER_DTD:
9540 xmlGenericError(xmlGenericErrorContext,
9541 "PP: try DTD\n");break;
9542 case XML_PARSER_EPILOG:
9543 xmlGenericError(xmlGenericErrorContext,
9544 "PP: try EPILOG\n");break;
9545 case XML_PARSER_PI:
9546 xmlGenericError(xmlGenericErrorContext,
9547 "PP: try PI\n");break;
9548 case XML_PARSER_IGNORE:
9549 xmlGenericError(xmlGenericErrorContext,
9550 "PP: try IGNORE\n");break;
9551 }
9552#endif
9553
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009554 if ((ctxt->input != NULL) &&
9555 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009556 xmlSHRINK(ctxt);
9557 ctxt->checkIndex = 0;
9558 }
9559 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009560
Daniel Veillarda880b122003-04-21 21:36:41 +00009561 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009562 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009563 return(0);
9564
9565
Owen Taylor3473f882001-02-23 17:55:21 +00009566 /*
9567 * Pop-up of finished entities.
9568 */
9569 while ((RAW == 0) && (ctxt->inputNr > 1))
9570 xmlPopInput(ctxt);
9571
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009572 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009573 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009574 avail = ctxt->input->length -
9575 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009576 else {
9577 /*
9578 * If we are operating on converted input, try to flush
9579 * remainng chars to avoid them stalling in the non-converted
9580 * buffer.
9581 */
9582 if ((ctxt->input->buf->raw != NULL) &&
9583 (ctxt->input->buf->raw->use > 0)) {
9584 int base = ctxt->input->base -
9585 ctxt->input->buf->buffer->content;
9586 int current = ctxt->input->cur - ctxt->input->base;
9587
9588 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9589 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9590 ctxt->input->cur = ctxt->input->base + current;
9591 ctxt->input->end =
9592 &ctxt->input->buf->buffer->content[
9593 ctxt->input->buf->buffer->use];
9594 }
9595 avail = ctxt->input->buf->buffer->use -
9596 (ctxt->input->cur - ctxt->input->base);
9597 }
Owen Taylor3473f882001-02-23 17:55:21 +00009598 if (avail < 1)
9599 goto done;
9600 switch (ctxt->instate) {
9601 case XML_PARSER_EOF:
9602 /*
9603 * Document parsing is done !
9604 */
9605 goto done;
9606 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009607 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9608 xmlChar start[4];
9609 xmlCharEncoding enc;
9610
9611 /*
9612 * Very first chars read from the document flow.
9613 */
9614 if (avail < 4)
9615 goto done;
9616
9617 /*
9618 * Get the 4 first bytes and decode the charset
9619 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009620 * plug some encoding conversion routines,
9621 * else xmlSwitchEncoding will set to (default)
9622 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009623 */
9624 start[0] = RAW;
9625 start[1] = NXT(1);
9626 start[2] = NXT(2);
9627 start[3] = NXT(3);
9628 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009629 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009630 break;
9631 }
Owen Taylor3473f882001-02-23 17:55:21 +00009632
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009633 if (avail < 2)
9634 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009635 cur = ctxt->input->cur[0];
9636 next = ctxt->input->cur[1];
9637 if (cur == 0) {
9638 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9639 ctxt->sax->setDocumentLocator(ctxt->userData,
9640 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009641 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009642 ctxt->instate = XML_PARSER_EOF;
9643#ifdef DEBUG_PUSH
9644 xmlGenericError(xmlGenericErrorContext,
9645 "PP: entering EOF\n");
9646#endif
9647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9648 ctxt->sax->endDocument(ctxt->userData);
9649 goto done;
9650 }
9651 if ((cur == '<') && (next == '?')) {
9652 /* PI or XML decl */
9653 if (avail < 5) return(ret);
9654 if ((!terminate) &&
9655 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9656 return(ret);
9657 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9658 ctxt->sax->setDocumentLocator(ctxt->userData,
9659 &xmlDefaultSAXLocator);
9660 if ((ctxt->input->cur[2] == 'x') &&
9661 (ctxt->input->cur[3] == 'm') &&
9662 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009663 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009664 ret += 5;
9665#ifdef DEBUG_PUSH
9666 xmlGenericError(xmlGenericErrorContext,
9667 "PP: Parsing XML Decl\n");
9668#endif
9669 xmlParseXMLDecl(ctxt);
9670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9671 /*
9672 * The XML REC instructs us to stop parsing right
9673 * here
9674 */
9675 ctxt->instate = XML_PARSER_EOF;
9676 return(0);
9677 }
9678 ctxt->standalone = ctxt->input->standalone;
9679 if ((ctxt->encoding == NULL) &&
9680 (ctxt->input->encoding != NULL))
9681 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9682 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9683 (!ctxt->disableSAX))
9684 ctxt->sax->startDocument(ctxt->userData);
9685 ctxt->instate = XML_PARSER_MISC;
9686#ifdef DEBUG_PUSH
9687 xmlGenericError(xmlGenericErrorContext,
9688 "PP: entering MISC\n");
9689#endif
9690 } else {
9691 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9692 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9693 (!ctxt->disableSAX))
9694 ctxt->sax->startDocument(ctxt->userData);
9695 ctxt->instate = XML_PARSER_MISC;
9696#ifdef DEBUG_PUSH
9697 xmlGenericError(xmlGenericErrorContext,
9698 "PP: entering MISC\n");
9699#endif
9700 }
9701 } else {
9702 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9703 ctxt->sax->setDocumentLocator(ctxt->userData,
9704 &xmlDefaultSAXLocator);
9705 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009706 if (ctxt->version == NULL) {
9707 xmlErrMemory(ctxt, NULL);
9708 break;
9709 }
Owen Taylor3473f882001-02-23 17:55:21 +00009710 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9711 (!ctxt->disableSAX))
9712 ctxt->sax->startDocument(ctxt->userData);
9713 ctxt->instate = XML_PARSER_MISC;
9714#ifdef DEBUG_PUSH
9715 xmlGenericError(xmlGenericErrorContext,
9716 "PP: entering MISC\n");
9717#endif
9718 }
9719 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009720 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009721 const xmlChar *name;
9722 const xmlChar *prefix;
9723 const xmlChar *URI;
9724 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009725
9726 if ((avail < 2) && (ctxt->inputNr == 1))
9727 goto done;
9728 cur = ctxt->input->cur[0];
9729 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009731 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9733 ctxt->sax->endDocument(ctxt->userData);
9734 goto done;
9735 }
9736 if (!terminate) {
9737 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009738 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009739 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009740 goto done;
9741 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9742 goto done;
9743 }
9744 }
9745 if (ctxt->spaceNr == 0)
9746 spacePush(ctxt, -1);
9747 else
9748 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009749#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009750 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009751#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009752 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009753#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009754 else
9755 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009756#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009757 if (name == NULL) {
9758 spacePop(ctxt);
9759 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009760 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9761 ctxt->sax->endDocument(ctxt->userData);
9762 goto done;
9763 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009764#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009765 /*
9766 * [ VC: Root Element Type ]
9767 * The Name in the document type declaration must match
9768 * the element type of the root element.
9769 */
9770 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9771 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9772 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009773#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009774
9775 /*
9776 * Check for an Empty Element.
9777 */
9778 if ((RAW == '/') && (NXT(1) == '>')) {
9779 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009780
9781 if (ctxt->sax2) {
9782 if ((ctxt->sax != NULL) &&
9783 (ctxt->sax->endElementNs != NULL) &&
9784 (!ctxt->disableSAX))
9785 ctxt->sax->endElementNs(ctxt->userData, name,
9786 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009787 if (ctxt->nsNr - nsNr > 0)
9788 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009789#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009790 } else {
9791 if ((ctxt->sax != NULL) &&
9792 (ctxt->sax->endElement != NULL) &&
9793 (!ctxt->disableSAX))
9794 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009795#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009796 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009797 spacePop(ctxt);
9798 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009799 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009800 } else {
9801 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009802 }
9803 break;
9804 }
9805 if (RAW == '>') {
9806 NEXT;
9807 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009808 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009809 "Couldn't find end of Start Tag %s\n",
9810 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009811 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009812 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009813 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009814 if (ctxt->sax2)
9815 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009816#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009817 else
9818 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009819#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009820
Daniel Veillarda880b122003-04-21 21:36:41 +00009821 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009822 break;
9823 }
9824 case XML_PARSER_CONTENT: {
9825 const xmlChar *test;
9826 unsigned int cons;
9827 if ((avail < 2) && (ctxt->inputNr == 1))
9828 goto done;
9829 cur = ctxt->input->cur[0];
9830 next = ctxt->input->cur[1];
9831
9832 test = CUR_PTR;
9833 cons = ctxt->input->consumed;
9834 if ((cur == '<') && (next == '/')) {
9835 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009836 break;
9837 } else if ((cur == '<') && (next == '?')) {
9838 if ((!terminate) &&
9839 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9840 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009841 xmlParsePI(ctxt);
9842 } else if ((cur == '<') && (next != '!')) {
9843 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009844 break;
9845 } else if ((cur == '<') && (next == '!') &&
9846 (ctxt->input->cur[2] == '-') &&
9847 (ctxt->input->cur[3] == '-')) {
9848 if ((!terminate) &&
9849 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9850 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009851 xmlParseComment(ctxt);
9852 ctxt->instate = XML_PARSER_CONTENT;
9853 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9854 (ctxt->input->cur[2] == '[') &&
9855 (ctxt->input->cur[3] == 'C') &&
9856 (ctxt->input->cur[4] == 'D') &&
9857 (ctxt->input->cur[5] == 'A') &&
9858 (ctxt->input->cur[6] == 'T') &&
9859 (ctxt->input->cur[7] == 'A') &&
9860 (ctxt->input->cur[8] == '[')) {
9861 SKIP(9);
9862 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009863 break;
9864 } else if ((cur == '<') && (next == '!') &&
9865 (avail < 9)) {
9866 goto done;
9867 } else if (cur == '&') {
9868 if ((!terminate) &&
9869 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9870 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009871 xmlParseReference(ctxt);
9872 } else {
9873 /* TODO Avoid the extra copy, handle directly !!! */
9874 /*
9875 * Goal of the following test is:
9876 * - minimize calls to the SAX 'character' callback
9877 * when they are mergeable
9878 * - handle an problem for isBlank when we only parse
9879 * a sequence of blank chars and the next one is
9880 * not available to check against '<' presence.
9881 * - tries to homogenize the differences in SAX
9882 * callbacks between the push and pull versions
9883 * of the parser.
9884 */
9885 if ((ctxt->inputNr == 1) &&
9886 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9887 if (!terminate) {
9888 if (ctxt->progressive) {
9889 if ((lastlt == NULL) ||
9890 (ctxt->input->cur > lastlt))
9891 goto done;
9892 } else if (xmlParseLookupSequence(ctxt,
9893 '<', 0, 0) < 0) {
9894 goto done;
9895 }
9896 }
9897 }
9898 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009899 xmlParseCharData(ctxt, 0);
9900 }
9901 /*
9902 * Pop-up of finished entities.
9903 */
9904 while ((RAW == 0) && (ctxt->inputNr > 1))
9905 xmlPopInput(ctxt);
9906 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9908 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009909 ctxt->instate = XML_PARSER_EOF;
9910 break;
9911 }
9912 break;
9913 }
9914 case XML_PARSER_END_TAG:
9915 if (avail < 2)
9916 goto done;
9917 if (!terminate) {
9918 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009919 /* > can be found unescaped in attribute values */
9920 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009921 goto done;
9922 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9923 goto done;
9924 }
9925 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009926 if (ctxt->sax2) {
9927 xmlParseEndTag2(ctxt,
9928 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9929 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009930 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009931 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009932 }
9933#ifdef LIBXML_SAX1_ENABLED
9934 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009935 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009936#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009937 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009938 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009939 } else {
9940 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009941 }
9942 break;
9943 case XML_PARSER_CDATA_SECTION: {
9944 /*
9945 * The Push mode need to have the SAX callback for
9946 * cdataBlock merge back contiguous callbacks.
9947 */
9948 int base;
9949
9950 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9951 if (base < 0) {
9952 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009953 int tmp;
9954
9955 tmp = xmlCheckCdataPush(ctxt->input->cur,
9956 XML_PARSER_BIG_BUFFER_SIZE);
9957 if (tmp < 0) {
9958 tmp = -tmp;
9959 ctxt->input->cur += tmp;
9960 goto encoding_error;
9961 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9963 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009964 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009965 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009966 else if (ctxt->sax->characters != NULL)
9967 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009968 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009969 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009970 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +00009971 ctxt->checkIndex = 0;
9972 }
9973 goto done;
9974 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +00009975 int tmp;
9976
9977 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
9978 if ((tmp < 0) || (tmp != base)) {
9979 tmp = -tmp;
9980 ctxt->input->cur += tmp;
9981 goto encoding_error;
9982 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009983 if ((ctxt->sax != NULL) && (base > 0) &&
9984 (!ctxt->disableSAX)) {
9985 if (ctxt->sax->cdataBlock != NULL)
9986 ctxt->sax->cdataBlock(ctxt->userData,
9987 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009988 else if (ctxt->sax->characters != NULL)
9989 ctxt->sax->characters(ctxt->userData,
9990 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009991 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009992 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009993 ctxt->checkIndex = 0;
9994 ctxt->instate = XML_PARSER_CONTENT;
9995#ifdef DEBUG_PUSH
9996 xmlGenericError(xmlGenericErrorContext,
9997 "PP: entering CONTENT\n");
9998#endif
9999 }
10000 break;
10001 }
Owen Taylor3473f882001-02-23 17:55:21 +000010002 case XML_PARSER_MISC:
10003 SKIP_BLANKS;
10004 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010005 avail = ctxt->input->length -
10006 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010007 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010008 avail = ctxt->input->buf->buffer->use -
10009 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010010 if (avail < 2)
10011 goto done;
10012 cur = ctxt->input->cur[0];
10013 next = ctxt->input->cur[1];
10014 if ((cur == '<') && (next == '?')) {
10015 if ((!terminate) &&
10016 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10017 goto done;
10018#ifdef DEBUG_PUSH
10019 xmlGenericError(xmlGenericErrorContext,
10020 "PP: Parsing PI\n");
10021#endif
10022 xmlParsePI(ctxt);
10023 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010024 (ctxt->input->cur[2] == '-') &&
10025 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010026 if ((!terminate) &&
10027 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10028 goto done;
10029#ifdef DEBUG_PUSH
10030 xmlGenericError(xmlGenericErrorContext,
10031 "PP: Parsing Comment\n");
10032#endif
10033 xmlParseComment(ctxt);
10034 ctxt->instate = XML_PARSER_MISC;
10035 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010036 (ctxt->input->cur[2] == 'D') &&
10037 (ctxt->input->cur[3] == 'O') &&
10038 (ctxt->input->cur[4] == 'C') &&
10039 (ctxt->input->cur[5] == 'T') &&
10040 (ctxt->input->cur[6] == 'Y') &&
10041 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010042 (ctxt->input->cur[8] == 'E')) {
10043 if ((!terminate) &&
10044 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10045 goto done;
10046#ifdef DEBUG_PUSH
10047 xmlGenericError(xmlGenericErrorContext,
10048 "PP: Parsing internal subset\n");
10049#endif
10050 ctxt->inSubset = 1;
10051 xmlParseDocTypeDecl(ctxt);
10052 if (RAW == '[') {
10053 ctxt->instate = XML_PARSER_DTD;
10054#ifdef DEBUG_PUSH
10055 xmlGenericError(xmlGenericErrorContext,
10056 "PP: entering DTD\n");
10057#endif
10058 } else {
10059 /*
10060 * Create and update the external subset.
10061 */
10062 ctxt->inSubset = 2;
10063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10064 (ctxt->sax->externalSubset != NULL))
10065 ctxt->sax->externalSubset(ctxt->userData,
10066 ctxt->intSubName, ctxt->extSubSystem,
10067 ctxt->extSubURI);
10068 ctxt->inSubset = 0;
10069 ctxt->instate = XML_PARSER_PROLOG;
10070#ifdef DEBUG_PUSH
10071 xmlGenericError(xmlGenericErrorContext,
10072 "PP: entering PROLOG\n");
10073#endif
10074 }
10075 } else if ((cur == '<') && (next == '!') &&
10076 (avail < 9)) {
10077 goto done;
10078 } else {
10079 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010080 ctxt->progressive = 1;
10081 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010082#ifdef DEBUG_PUSH
10083 xmlGenericError(xmlGenericErrorContext,
10084 "PP: entering START_TAG\n");
10085#endif
10086 }
10087 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010088 case XML_PARSER_PROLOG:
10089 SKIP_BLANKS;
10090 if (ctxt->input->buf == NULL)
10091 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10092 else
10093 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10094 if (avail < 2)
10095 goto done;
10096 cur = ctxt->input->cur[0];
10097 next = ctxt->input->cur[1];
10098 if ((cur == '<') && (next == '?')) {
10099 if ((!terminate) &&
10100 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10101 goto done;
10102#ifdef DEBUG_PUSH
10103 xmlGenericError(xmlGenericErrorContext,
10104 "PP: Parsing PI\n");
10105#endif
10106 xmlParsePI(ctxt);
10107 } else if ((cur == '<') && (next == '!') &&
10108 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10109 if ((!terminate) &&
10110 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10111 goto done;
10112#ifdef DEBUG_PUSH
10113 xmlGenericError(xmlGenericErrorContext,
10114 "PP: Parsing Comment\n");
10115#endif
10116 xmlParseComment(ctxt);
10117 ctxt->instate = XML_PARSER_PROLOG;
10118 } else if ((cur == '<') && (next == '!') &&
10119 (avail < 4)) {
10120 goto done;
10121 } else {
10122 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010123 if (ctxt->progressive == 0)
10124 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000010125 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010126#ifdef DEBUG_PUSH
10127 xmlGenericError(xmlGenericErrorContext,
10128 "PP: entering START_TAG\n");
10129#endif
10130 }
10131 break;
10132 case XML_PARSER_EPILOG:
10133 SKIP_BLANKS;
10134 if (ctxt->input->buf == NULL)
10135 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10136 else
10137 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10138 if (avail < 2)
10139 goto done;
10140 cur = ctxt->input->cur[0];
10141 next = ctxt->input->cur[1];
10142 if ((cur == '<') && (next == '?')) {
10143 if ((!terminate) &&
10144 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10145 goto done;
10146#ifdef DEBUG_PUSH
10147 xmlGenericError(xmlGenericErrorContext,
10148 "PP: Parsing PI\n");
10149#endif
10150 xmlParsePI(ctxt);
10151 ctxt->instate = XML_PARSER_EPILOG;
10152 } else if ((cur == '<') && (next == '!') &&
10153 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10154 if ((!terminate) &&
10155 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10156 goto done;
10157#ifdef DEBUG_PUSH
10158 xmlGenericError(xmlGenericErrorContext,
10159 "PP: Parsing Comment\n");
10160#endif
10161 xmlParseComment(ctxt);
10162 ctxt->instate = XML_PARSER_EPILOG;
10163 } else if ((cur == '<') && (next == '!') &&
10164 (avail < 4)) {
10165 goto done;
10166 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010167 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010168 ctxt->instate = XML_PARSER_EOF;
10169#ifdef DEBUG_PUSH
10170 xmlGenericError(xmlGenericErrorContext,
10171 "PP: entering EOF\n");
10172#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010173 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010174 ctxt->sax->endDocument(ctxt->userData);
10175 goto done;
10176 }
10177 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010178 case XML_PARSER_DTD: {
10179 /*
10180 * Sorry but progressive parsing of the internal subset
10181 * is not expected to be supported. We first check that
10182 * the full content of the internal subset is available and
10183 * the parsing is launched only at that point.
10184 * Internal subset ends up with "']' S? '>'" in an unescaped
10185 * section and not in a ']]>' sequence which are conditional
10186 * sections (whoever argued to keep that crap in XML deserve
10187 * a place in hell !).
10188 */
10189 int base, i;
10190 xmlChar *buf;
10191 xmlChar quote = 0;
10192
10193 base = ctxt->input->cur - ctxt->input->base;
10194 if (base < 0) return(0);
10195 if (ctxt->checkIndex > base)
10196 base = ctxt->checkIndex;
10197 buf = ctxt->input->buf->buffer->content;
10198 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10199 base++) {
10200 if (quote != 0) {
10201 if (buf[base] == quote)
10202 quote = 0;
10203 continue;
10204 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010205 if ((quote == 0) && (buf[base] == '<')) {
10206 int found = 0;
10207 /* special handling of comments */
10208 if (((unsigned int) base + 4 <
10209 ctxt->input->buf->buffer->use) &&
10210 (buf[base + 1] == '!') &&
10211 (buf[base + 2] == '-') &&
10212 (buf[base + 3] == '-')) {
10213 for (;(unsigned int) base + 3 <
10214 ctxt->input->buf->buffer->use; base++) {
10215 if ((buf[base] == '-') &&
10216 (buf[base + 1] == '-') &&
10217 (buf[base + 2] == '>')) {
10218 found = 1;
10219 base += 2;
10220 break;
10221 }
10222 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010223 if (!found) {
10224#if 0
10225 fprintf(stderr, "unfinished comment\n");
10226#endif
10227 break; /* for */
10228 }
Daniel Veillard036143b2004-02-12 11:57:52 +000010229 continue;
10230 }
10231 }
Owen Taylor3473f882001-02-23 17:55:21 +000010232 if (buf[base] == '"') {
10233 quote = '"';
10234 continue;
10235 }
10236 if (buf[base] == '\'') {
10237 quote = '\'';
10238 continue;
10239 }
10240 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010241#if 0
10242 fprintf(stderr, "%c%c%c%c: ", buf[base],
10243 buf[base + 1], buf[base + 2], buf[base + 3]);
10244#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010245 if ((unsigned int) base +1 >=
10246 ctxt->input->buf->buffer->use)
10247 break;
10248 if (buf[base + 1] == ']') {
10249 /* conditional crap, skip both ']' ! */
10250 base++;
10251 continue;
10252 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010253 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000010254 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10255 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010256 if (buf[base + i] == '>') {
10257#if 0
10258 fprintf(stderr, "found\n");
10259#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010260 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010261 }
10262 if (!IS_BLANK_CH(buf[base + i])) {
10263#if 0
10264 fprintf(stderr, "not found\n");
10265#endif
10266 goto not_end_of_int_subset;
10267 }
Owen Taylor3473f882001-02-23 17:55:21 +000010268 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010269#if 0
10270 fprintf(stderr, "end of stream\n");
10271#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010272 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010273
Owen Taylor3473f882001-02-23 17:55:21 +000010274 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000010275not_end_of_int_subset:
10276 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000010277 }
10278 /*
10279 * We didn't found the end of the Internal subset
10280 */
Owen Taylor3473f882001-02-23 17:55:21 +000010281#ifdef DEBUG_PUSH
10282 if (next == 0)
10283 xmlGenericError(xmlGenericErrorContext,
10284 "PP: lookup of int subset end filed\n");
10285#endif
10286 goto done;
10287
10288found_end_int_subset:
10289 xmlParseInternalSubset(ctxt);
10290 ctxt->inSubset = 2;
10291 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10292 (ctxt->sax->externalSubset != NULL))
10293 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10294 ctxt->extSubSystem, ctxt->extSubURI);
10295 ctxt->inSubset = 0;
10296 ctxt->instate = XML_PARSER_PROLOG;
10297 ctxt->checkIndex = 0;
10298#ifdef DEBUG_PUSH
10299 xmlGenericError(xmlGenericErrorContext,
10300 "PP: entering PROLOG\n");
10301#endif
10302 break;
10303 }
10304 case XML_PARSER_COMMENT:
10305 xmlGenericError(xmlGenericErrorContext,
10306 "PP: internal error, state == COMMENT\n");
10307 ctxt->instate = XML_PARSER_CONTENT;
10308#ifdef DEBUG_PUSH
10309 xmlGenericError(xmlGenericErrorContext,
10310 "PP: entering CONTENT\n");
10311#endif
10312 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010313 case XML_PARSER_IGNORE:
10314 xmlGenericError(xmlGenericErrorContext,
10315 "PP: internal error, state == IGNORE");
10316 ctxt->instate = XML_PARSER_DTD;
10317#ifdef DEBUG_PUSH
10318 xmlGenericError(xmlGenericErrorContext,
10319 "PP: entering DTD\n");
10320#endif
10321 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010322 case XML_PARSER_PI:
10323 xmlGenericError(xmlGenericErrorContext,
10324 "PP: internal error, state == PI\n");
10325 ctxt->instate = XML_PARSER_CONTENT;
10326#ifdef DEBUG_PUSH
10327 xmlGenericError(xmlGenericErrorContext,
10328 "PP: entering CONTENT\n");
10329#endif
10330 break;
10331 case XML_PARSER_ENTITY_DECL:
10332 xmlGenericError(xmlGenericErrorContext,
10333 "PP: internal error, state == ENTITY_DECL\n");
10334 ctxt->instate = XML_PARSER_DTD;
10335#ifdef DEBUG_PUSH
10336 xmlGenericError(xmlGenericErrorContext,
10337 "PP: entering DTD\n");
10338#endif
10339 break;
10340 case XML_PARSER_ENTITY_VALUE:
10341 xmlGenericError(xmlGenericErrorContext,
10342 "PP: internal error, state == ENTITY_VALUE\n");
10343 ctxt->instate = XML_PARSER_CONTENT;
10344#ifdef DEBUG_PUSH
10345 xmlGenericError(xmlGenericErrorContext,
10346 "PP: entering DTD\n");
10347#endif
10348 break;
10349 case XML_PARSER_ATTRIBUTE_VALUE:
10350 xmlGenericError(xmlGenericErrorContext,
10351 "PP: internal error, state == ATTRIBUTE_VALUE\n");
10352 ctxt->instate = XML_PARSER_START_TAG;
10353#ifdef DEBUG_PUSH
10354 xmlGenericError(xmlGenericErrorContext,
10355 "PP: entering START_TAG\n");
10356#endif
10357 break;
10358 case XML_PARSER_SYSTEM_LITERAL:
10359 xmlGenericError(xmlGenericErrorContext,
10360 "PP: internal error, state == SYSTEM_LITERAL\n");
10361 ctxt->instate = XML_PARSER_START_TAG;
10362#ifdef DEBUG_PUSH
10363 xmlGenericError(xmlGenericErrorContext,
10364 "PP: entering START_TAG\n");
10365#endif
10366 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010367 case XML_PARSER_PUBLIC_LITERAL:
10368 xmlGenericError(xmlGenericErrorContext,
10369 "PP: internal error, state == PUBLIC_LITERAL\n");
10370 ctxt->instate = XML_PARSER_START_TAG;
10371#ifdef DEBUG_PUSH
10372 xmlGenericError(xmlGenericErrorContext,
10373 "PP: entering START_TAG\n");
10374#endif
10375 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010376 }
10377 }
10378done:
10379#ifdef DEBUG_PUSH
10380 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10381#endif
10382 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010383encoding_error:
10384 {
10385 char buffer[150];
10386
10387 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
10388 ctxt->input->cur[0], ctxt->input->cur[1],
10389 ctxt->input->cur[2], ctxt->input->cur[3]);
10390 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
10391 "Input is not proper UTF-8, indicate encoding !\n%s",
10392 BAD_CAST buffer, NULL);
10393 }
10394 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000010395}
10396
10397/**
Owen Taylor3473f882001-02-23 17:55:21 +000010398 * xmlParseChunk:
10399 * @ctxt: an XML parser context
10400 * @chunk: an char array
10401 * @size: the size in byte of the chunk
10402 * @terminate: last chunk indicator
10403 *
10404 * Parse a Chunk of memory
10405 *
10406 * Returns zero if no error, the xmlParserErrors otherwise.
10407 */
10408int
10409xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10410 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010411 if (ctxt == NULL)
10412 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010413 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010414 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010415 if (ctxt->instate == XML_PARSER_START)
10416 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010417 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10418 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10419 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10420 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010421 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010422
William M. Bracka3215c72004-07-31 16:24:01 +000010423 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10424 if (res < 0) {
10425 ctxt->errNo = XML_PARSER_EOF;
10426 ctxt->disableSAX = 1;
10427 return (XML_PARSER_EOF);
10428 }
Owen Taylor3473f882001-02-23 17:55:21 +000010429 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10430 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010431 ctxt->input->end =
10432 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010433#ifdef DEBUG_PUSH
10434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10435#endif
10436
Owen Taylor3473f882001-02-23 17:55:21 +000010437 } else if (ctxt->instate != XML_PARSER_EOF) {
10438 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10439 xmlParserInputBufferPtr in = ctxt->input->buf;
10440 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10441 (in->raw != NULL)) {
10442 int nbchars;
10443
10444 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10445 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010446 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010447 xmlGenericError(xmlGenericErrorContext,
10448 "xmlParseChunk: encoder error\n");
10449 return(XML_ERR_INVALID_ENCODING);
10450 }
10451 }
10452 }
10453 }
10454 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010455 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010456 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010457 if (terminate) {
10458 /*
10459 * Check for termination
10460 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010461 int avail = 0;
10462
10463 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010464 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010465 avail = ctxt->input->length -
10466 (ctxt->input->cur - ctxt->input->base);
10467 else
10468 avail = ctxt->input->buf->buffer->use -
10469 (ctxt->input->cur - ctxt->input->base);
10470 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010471
Owen Taylor3473f882001-02-23 17:55:21 +000010472 if ((ctxt->instate != XML_PARSER_EOF) &&
10473 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010476 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010477 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010478 }
Owen Taylor3473f882001-02-23 17:55:21 +000010479 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010480 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010481 ctxt->sax->endDocument(ctxt->userData);
10482 }
10483 ctxt->instate = XML_PARSER_EOF;
10484 }
10485 return((xmlParserErrors) ctxt->errNo);
10486}
10487
10488/************************************************************************
10489 * *
10490 * I/O front end functions to the parser *
10491 * *
10492 ************************************************************************/
10493
10494/**
Owen Taylor3473f882001-02-23 17:55:21 +000010495 * xmlCreatePushParserCtxt:
10496 * @sax: a SAX handler
10497 * @user_data: The user data returned on SAX callbacks
10498 * @chunk: a pointer to an array of chars
10499 * @size: number of chars in the array
10500 * @filename: an optional file name or URI
10501 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010502 * Create a parser context for using the XML parser in push mode.
10503 * If @buffer and @size are non-NULL, the data is used to detect
10504 * the encoding. The remaining characters will be parsed so they
10505 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010506 * To allow content encoding detection, @size should be >= 4
10507 * The value of @filename is used for fetching external entities
10508 * and error/warning reports.
10509 *
10510 * Returns the new parser context or NULL
10511 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010512
Owen Taylor3473f882001-02-23 17:55:21 +000010513xmlParserCtxtPtr
10514xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10515 const char *chunk, int size, const char *filename) {
10516 xmlParserCtxtPtr ctxt;
10517 xmlParserInputPtr inputStream;
10518 xmlParserInputBufferPtr buf;
10519 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10520
10521 /*
10522 * plug some encoding conversion routines
10523 */
10524 if ((chunk != NULL) && (size >= 4))
10525 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10526
10527 buf = xmlAllocParserInputBuffer(enc);
10528 if (buf == NULL) return(NULL);
10529
10530 ctxt = xmlNewParserCtxt();
10531 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010532 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010533 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 return(NULL);
10535 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010536 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010537 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10538 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010539 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010540 xmlFreeParserInputBuffer(buf);
10541 xmlFreeParserCtxt(ctxt);
10542 return(NULL);
10543 }
Owen Taylor3473f882001-02-23 17:55:21 +000010544 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010545#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010546 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010547#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010548 xmlFree(ctxt->sax);
10549 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10550 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010551 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010552 xmlFreeParserInputBuffer(buf);
10553 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010554 return(NULL);
10555 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010556 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10557 if (sax->initialized == XML_SAX2_MAGIC)
10558 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10559 else
10560 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010561 if (user_data != NULL)
10562 ctxt->userData = user_data;
10563 }
10564 if (filename == NULL) {
10565 ctxt->directory = NULL;
10566 } else {
10567 ctxt->directory = xmlParserGetDirectory(filename);
10568 }
10569
10570 inputStream = xmlNewInputStream(ctxt);
10571 if (inputStream == NULL) {
10572 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010573 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010574 return(NULL);
10575 }
10576
10577 if (filename == NULL)
10578 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010579 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010580 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010581 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010582 if (inputStream->filename == NULL) {
10583 xmlFreeParserCtxt(ctxt);
10584 xmlFreeParserInputBuffer(buf);
10585 return(NULL);
10586 }
10587 }
Owen Taylor3473f882001-02-23 17:55:21 +000010588 inputStream->buf = buf;
10589 inputStream->base = inputStream->buf->buffer->content;
10590 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010591 inputStream->end =
10592 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010593
10594 inputPush(ctxt, inputStream);
10595
William M. Brack3a1cd212005-02-11 14:35:54 +000010596 /*
10597 * If the caller didn't provide an initial 'chunk' for determining
10598 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10599 * that it can be automatically determined later
10600 */
10601 if ((size == 0) || (chunk == NULL)) {
10602 ctxt->charset = XML_CHAR_ENCODING_NONE;
10603 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010604 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10605 int cur = ctxt->input->cur - ctxt->input->base;
10606
Owen Taylor3473f882001-02-23 17:55:21 +000010607 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010608
10609 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10610 ctxt->input->cur = ctxt->input->base + cur;
10611 ctxt->input->end =
10612 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010613#ifdef DEBUG_PUSH
10614 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10615#endif
10616 }
10617
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010618 if (enc != XML_CHAR_ENCODING_NONE) {
10619 xmlSwitchEncoding(ctxt, enc);
10620 }
10621
Owen Taylor3473f882001-02-23 17:55:21 +000010622 return(ctxt);
10623}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010624#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010625
10626/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000010627 * xmlStopParser:
10628 * @ctxt: an XML parser context
10629 *
10630 * Blocks further parser processing
10631 */
10632void
10633xmlStopParser(xmlParserCtxtPtr ctxt) {
10634 if (ctxt == NULL)
10635 return;
10636 ctxt->instate = XML_PARSER_EOF;
10637 ctxt->disableSAX = 1;
10638 if (ctxt->input != NULL) {
10639 ctxt->input->cur = BAD_CAST"";
10640 ctxt->input->base = ctxt->input->cur;
10641 }
10642}
10643
10644/**
Owen Taylor3473f882001-02-23 17:55:21 +000010645 * xmlCreateIOParserCtxt:
10646 * @sax: a SAX handler
10647 * @user_data: The user data returned on SAX callbacks
10648 * @ioread: an I/O read function
10649 * @ioclose: an I/O close function
10650 * @ioctx: an I/O handler
10651 * @enc: the charset encoding if known
10652 *
10653 * Create a parser context for using the XML parser with an existing
10654 * I/O stream
10655 *
10656 * Returns the new parser context or NULL
10657 */
10658xmlParserCtxtPtr
10659xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10660 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10661 void *ioctx, xmlCharEncoding enc) {
10662 xmlParserCtxtPtr ctxt;
10663 xmlParserInputPtr inputStream;
10664 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010665
10666 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010667
10668 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10669 if (buf == NULL) return(NULL);
10670
10671 ctxt = xmlNewParserCtxt();
10672 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010673 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010674 return(NULL);
10675 }
10676 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010677#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010678 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010679#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010680 xmlFree(ctxt->sax);
10681 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10682 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010683 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000010684 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010685 return(NULL);
10686 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010687 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10688 if (sax->initialized == XML_SAX2_MAGIC)
10689 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10690 else
10691 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010692 if (user_data != NULL)
10693 ctxt->userData = user_data;
10694 }
10695
10696 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10697 if (inputStream == NULL) {
10698 xmlFreeParserCtxt(ctxt);
10699 return(NULL);
10700 }
10701 inputPush(ctxt, inputStream);
10702
10703 return(ctxt);
10704}
10705
Daniel Veillard4432df22003-09-28 18:58:27 +000010706#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010707/************************************************************************
10708 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010709 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010710 * *
10711 ************************************************************************/
10712
10713/**
10714 * xmlIOParseDTD:
10715 * @sax: the SAX handler block or NULL
10716 * @input: an Input Buffer
10717 * @enc: the charset encoding if known
10718 *
10719 * Load and parse a DTD
10720 *
10721 * Returns the resulting xmlDtdPtr or NULL in case of error.
10722 * @input will be freed at parsing end.
10723 */
10724
10725xmlDtdPtr
10726xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10727 xmlCharEncoding enc) {
10728 xmlDtdPtr ret = NULL;
10729 xmlParserCtxtPtr ctxt;
10730 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010731 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010732
10733 if (input == NULL)
10734 return(NULL);
10735
10736 ctxt = xmlNewParserCtxt();
10737 if (ctxt == NULL) {
10738 return(NULL);
10739 }
10740
10741 /*
10742 * Set-up the SAX context
10743 */
10744 if (sax != NULL) {
10745 if (ctxt->sax != NULL)
10746 xmlFree(ctxt->sax);
10747 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010748 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010749 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010750 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010751
10752 /*
10753 * generate a parser input from the I/O handler
10754 */
10755
Daniel Veillard43caefb2003-12-07 19:32:22 +000010756 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010757 if (pinput == NULL) {
10758 if (sax != NULL) ctxt->sax = NULL;
10759 xmlFreeParserCtxt(ctxt);
10760 return(NULL);
10761 }
10762
10763 /*
10764 * plug some encoding conversion routines here.
10765 */
10766 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010767 if (enc != XML_CHAR_ENCODING_NONE) {
10768 xmlSwitchEncoding(ctxt, enc);
10769 }
Owen Taylor3473f882001-02-23 17:55:21 +000010770
10771 pinput->filename = NULL;
10772 pinput->line = 1;
10773 pinput->col = 1;
10774 pinput->base = ctxt->input->cur;
10775 pinput->cur = ctxt->input->cur;
10776 pinput->free = NULL;
10777
10778 /*
10779 * let's parse that entity knowing it's an external subset.
10780 */
10781 ctxt->inSubset = 2;
10782 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10783 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10784 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010785
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010786 if ((enc == XML_CHAR_ENCODING_NONE) &&
10787 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010788 /*
10789 * Get the 4 first bytes and decode the charset
10790 * if enc != XML_CHAR_ENCODING_NONE
10791 * plug some encoding conversion routines.
10792 */
10793 start[0] = RAW;
10794 start[1] = NXT(1);
10795 start[2] = NXT(2);
10796 start[3] = NXT(3);
10797 enc = xmlDetectCharEncoding(start, 4);
10798 if (enc != XML_CHAR_ENCODING_NONE) {
10799 xmlSwitchEncoding(ctxt, enc);
10800 }
10801 }
10802
Owen Taylor3473f882001-02-23 17:55:21 +000010803 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10804
10805 if (ctxt->myDoc != NULL) {
10806 if (ctxt->wellFormed) {
10807 ret = ctxt->myDoc->extSubset;
10808 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010809 if (ret != NULL) {
10810 xmlNodePtr tmp;
10811
10812 ret->doc = NULL;
10813 tmp = ret->children;
10814 while (tmp != NULL) {
10815 tmp->doc = NULL;
10816 tmp = tmp->next;
10817 }
10818 }
Owen Taylor3473f882001-02-23 17:55:21 +000010819 } else {
10820 ret = NULL;
10821 }
10822 xmlFreeDoc(ctxt->myDoc);
10823 ctxt->myDoc = NULL;
10824 }
10825 if (sax != NULL) ctxt->sax = NULL;
10826 xmlFreeParserCtxt(ctxt);
10827
10828 return(ret);
10829}
10830
10831/**
10832 * xmlSAXParseDTD:
10833 * @sax: the SAX handler block
10834 * @ExternalID: a NAME* containing the External ID of the DTD
10835 * @SystemID: a NAME* containing the URL to the DTD
10836 *
10837 * Load and parse an external subset.
10838 *
10839 * Returns the resulting xmlDtdPtr or NULL in case of error.
10840 */
10841
10842xmlDtdPtr
10843xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10844 const xmlChar *SystemID) {
10845 xmlDtdPtr ret = NULL;
10846 xmlParserCtxtPtr ctxt;
10847 xmlParserInputPtr input = NULL;
10848 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010849 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010850
10851 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10852
10853 ctxt = xmlNewParserCtxt();
10854 if (ctxt == NULL) {
10855 return(NULL);
10856 }
10857
10858 /*
10859 * Set-up the SAX context
10860 */
10861 if (sax != NULL) {
10862 if (ctxt->sax != NULL)
10863 xmlFree(ctxt->sax);
10864 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010865 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010866 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010867
10868 /*
10869 * Canonicalise the system ID
10870 */
10871 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010872 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010873 xmlFreeParserCtxt(ctxt);
10874 return(NULL);
10875 }
Owen Taylor3473f882001-02-23 17:55:21 +000010876
10877 /*
10878 * Ask the Entity resolver to load the damn thing
10879 */
10880
10881 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010882 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010883 if (input == NULL) {
10884 if (sax != NULL) ctxt->sax = NULL;
10885 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010886 if (systemIdCanonic != NULL)
10887 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 return(NULL);
10889 }
10890
10891 /*
10892 * plug some encoding conversion routines here.
10893 */
10894 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010895 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10896 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10897 xmlSwitchEncoding(ctxt, enc);
10898 }
Owen Taylor3473f882001-02-23 17:55:21 +000010899
10900 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010901 input->filename = (char *) systemIdCanonic;
10902 else
10903 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010904 input->line = 1;
10905 input->col = 1;
10906 input->base = ctxt->input->cur;
10907 input->cur = ctxt->input->cur;
10908 input->free = NULL;
10909
10910 /*
10911 * let's parse that entity knowing it's an external subset.
10912 */
10913 ctxt->inSubset = 2;
10914 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10915 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10916 ExternalID, SystemID);
10917 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10918
10919 if (ctxt->myDoc != NULL) {
10920 if (ctxt->wellFormed) {
10921 ret = ctxt->myDoc->extSubset;
10922 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010923 if (ret != NULL) {
10924 xmlNodePtr tmp;
10925
10926 ret->doc = NULL;
10927 tmp = ret->children;
10928 while (tmp != NULL) {
10929 tmp->doc = NULL;
10930 tmp = tmp->next;
10931 }
10932 }
Owen Taylor3473f882001-02-23 17:55:21 +000010933 } else {
10934 ret = NULL;
10935 }
10936 xmlFreeDoc(ctxt->myDoc);
10937 ctxt->myDoc = NULL;
10938 }
10939 if (sax != NULL) ctxt->sax = NULL;
10940 xmlFreeParserCtxt(ctxt);
10941
10942 return(ret);
10943}
10944
Daniel Veillard4432df22003-09-28 18:58:27 +000010945
Owen Taylor3473f882001-02-23 17:55:21 +000010946/**
10947 * xmlParseDTD:
10948 * @ExternalID: a NAME* containing the External ID of the DTD
10949 * @SystemID: a NAME* containing the URL to the DTD
10950 *
10951 * Load and parse an external subset.
10952 *
10953 * Returns the resulting xmlDtdPtr or NULL in case of error.
10954 */
10955
10956xmlDtdPtr
10957xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10958 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10959}
Daniel Veillard4432df22003-09-28 18:58:27 +000010960#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010961
10962/************************************************************************
10963 * *
10964 * Front ends when parsing an Entity *
10965 * *
10966 ************************************************************************/
10967
10968/**
Owen Taylor3473f882001-02-23 17:55:21 +000010969 * xmlParseCtxtExternalEntity:
10970 * @ctx: the existing parsing context
10971 * @URL: the URL for the entity to load
10972 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010973 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010974 *
10975 * Parse an external general entity within an existing parsing context
10976 * An external general parsed entity is well-formed if it matches the
10977 * production labeled extParsedEnt.
10978 *
10979 * [78] extParsedEnt ::= TextDecl? content
10980 *
10981 * Returns 0 if the entity is well formed, -1 in case of args problem and
10982 * the parser error code otherwise
10983 */
10984
10985int
10986xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010987 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010988 xmlParserCtxtPtr ctxt;
10989 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010990 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010991 xmlSAXHandlerPtr oldsax = NULL;
10992 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010993 xmlChar start[4];
10994 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010995
Daniel Veillardce682bc2004-11-05 17:22:25 +000010996 if (ctx == NULL) return(-1);
10997
Owen Taylor3473f882001-02-23 17:55:21 +000010998 if (ctx->depth > 40) {
10999 return(XML_ERR_ENTITY_LOOP);
11000 }
11001
Daniel Veillardcda96922001-08-21 10:56:31 +000011002 if (lst != NULL)
11003 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011004 if ((URL == NULL) && (ID == NULL))
11005 return(-1);
11006 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11007 return(-1);
11008
11009
11010 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
11011 if (ctxt == NULL) return(-1);
11012 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011013 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000011014 oldsax = ctxt->sax;
11015 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011016 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011017 newDoc = xmlNewDoc(BAD_CAST "1.0");
11018 if (newDoc == NULL) {
11019 xmlFreeParserCtxt(ctxt);
11020 return(-1);
11021 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011022 if (ctx->myDoc->dict) {
11023 newDoc->dict = ctx->myDoc->dict;
11024 xmlDictReference(newDoc->dict);
11025 }
Owen Taylor3473f882001-02-23 17:55:21 +000011026 if (ctx->myDoc != NULL) {
11027 newDoc->intSubset = ctx->myDoc->intSubset;
11028 newDoc->extSubset = ctx->myDoc->extSubset;
11029 }
11030 if (ctx->myDoc->URL != NULL) {
11031 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11032 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011033 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11034 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011035 ctxt->sax = oldsax;
11036 xmlFreeParserCtxt(ctxt);
11037 newDoc->intSubset = NULL;
11038 newDoc->extSubset = NULL;
11039 xmlFreeDoc(newDoc);
11040 return(-1);
11041 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011042 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011043 nodePush(ctxt, newDoc->children);
11044 if (ctx->myDoc == NULL) {
11045 ctxt->myDoc = newDoc;
11046 } else {
11047 ctxt->myDoc = ctx->myDoc;
11048 newDoc->children->doc = ctx->myDoc;
11049 }
11050
Daniel Veillard87a764e2001-06-20 17:41:10 +000011051 /*
11052 * Get the 4 first bytes and decode the charset
11053 * if enc != XML_CHAR_ENCODING_NONE
11054 * plug some encoding conversion routines.
11055 */
11056 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011057 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11058 start[0] = RAW;
11059 start[1] = NXT(1);
11060 start[2] = NXT(2);
11061 start[3] = NXT(3);
11062 enc = xmlDetectCharEncoding(start, 4);
11063 if (enc != XML_CHAR_ENCODING_NONE) {
11064 xmlSwitchEncoding(ctxt, enc);
11065 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011066 }
11067
Owen Taylor3473f882001-02-23 17:55:21 +000011068 /*
11069 * Parse a possible text declaration first
11070 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011071 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011072 xmlParseTextDecl(ctxt);
11073 }
11074
11075 /*
11076 * Doing validity checking on chunk doesn't make sense
11077 */
11078 ctxt->instate = XML_PARSER_CONTENT;
11079 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011080 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011081 ctxt->loadsubset = ctx->loadsubset;
11082 ctxt->depth = ctx->depth + 1;
11083 ctxt->replaceEntities = ctx->replaceEntities;
11084 if (ctxt->validate) {
11085 ctxt->vctxt.error = ctx->vctxt.error;
11086 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000011087 } else {
11088 ctxt->vctxt.error = NULL;
11089 ctxt->vctxt.warning = NULL;
11090 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000011091 ctxt->vctxt.nodeTab = NULL;
11092 ctxt->vctxt.nodeNr = 0;
11093 ctxt->vctxt.nodeMax = 0;
11094 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011095 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11096 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011097 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11098 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11099 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011100 ctxt->dictNames = ctx->dictNames;
11101 ctxt->attsDefault = ctx->attsDefault;
11102 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000011103 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000011104
11105 xmlParseContent(ctxt);
11106
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000011107 ctx->validate = ctxt->validate;
11108 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000011109 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011110 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011111 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011112 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011113 }
11114 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011115 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011116 }
11117
11118 if (!ctxt->wellFormed) {
11119 if (ctxt->errNo == 0)
11120 ret = 1;
11121 else
11122 ret = ctxt->errNo;
11123 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000011124 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011125 xmlNodePtr cur;
11126
11127 /*
11128 * Return the newly created nodeset after unlinking it from
11129 * they pseudo parent.
11130 */
11131 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000011132 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011133 while (cur != NULL) {
11134 cur->parent = NULL;
11135 cur = cur->next;
11136 }
11137 newDoc->children->children = NULL;
11138 }
11139 ret = 0;
11140 }
11141 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000011142 ctxt->dict = NULL;
11143 ctxt->attsDefault = NULL;
11144 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011145 xmlFreeParserCtxt(ctxt);
11146 newDoc->intSubset = NULL;
11147 newDoc->extSubset = NULL;
11148 xmlFreeDoc(newDoc);
11149
11150 return(ret);
11151}
11152
11153/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011154 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000011155 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011156 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000011157 * @sax: the SAX handler bloc (possibly NULL)
11158 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11159 * @depth: Used for loop detection, use 0
11160 * @URL: the URL for the entity to load
11161 * @ID: the System ID for the entity to load
11162 * @list: the return value for the set of parsed nodes
11163 *
Daniel Veillard257d9102001-05-08 10:41:44 +000011164 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000011165 *
11166 * Returns 0 if the entity is well formed, -1 in case of args problem and
11167 * the parser error code otherwise
11168 */
11169
Daniel Veillard7d515752003-09-26 19:12:37 +000011170static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011171xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
11172 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000011173 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011174 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000011175 xmlParserCtxtPtr ctxt;
11176 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011177 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011178 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000011179 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011180 xmlChar start[4];
11181 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000011182
11183 if (depth > 40) {
11184 return(XML_ERR_ENTITY_LOOP);
11185 }
11186
11187
11188
11189 if (list != NULL)
11190 *list = NULL;
11191 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000011192 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011193 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000011194 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011195
11196
11197 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000011198 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000011199 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011200 if (oldctxt != NULL) {
11201 ctxt->_private = oldctxt->_private;
11202 ctxt->loadsubset = oldctxt->loadsubset;
11203 ctxt->validate = oldctxt->validate;
11204 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011205 ctxt->record_info = oldctxt->record_info;
11206 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
11207 ctxt->node_seq.length = oldctxt->node_seq.length;
11208 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011209 } else {
11210 /*
11211 * Doing validity checking on chunk without context
11212 * doesn't make sense
11213 */
11214 ctxt->_private = NULL;
11215 ctxt->validate = 0;
11216 ctxt->external = 2;
11217 ctxt->loadsubset = 0;
11218 }
Owen Taylor3473f882001-02-23 17:55:21 +000011219 if (sax != NULL) {
11220 oldsax = ctxt->sax;
11221 ctxt->sax = sax;
11222 if (user_data != NULL)
11223 ctxt->userData = user_data;
11224 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011225 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011226 newDoc = xmlNewDoc(BAD_CAST "1.0");
11227 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011228 ctxt->node_seq.maximum = 0;
11229 ctxt->node_seq.length = 0;
11230 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011231 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000011232 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011233 }
11234 if (doc != NULL) {
11235 newDoc->intSubset = doc->intSubset;
11236 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011237 newDoc->dict = doc->dict;
11238 } else if (oldctxt != NULL) {
11239 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000011240 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011241 xmlDictReference(newDoc->dict);
11242
Owen Taylor3473f882001-02-23 17:55:21 +000011243 if (doc->URL != NULL) {
11244 newDoc->URL = xmlStrdup(doc->URL);
11245 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011246 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11247 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011248 if (sax != NULL)
11249 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011250 ctxt->node_seq.maximum = 0;
11251 ctxt->node_seq.length = 0;
11252 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011253 xmlFreeParserCtxt(ctxt);
11254 newDoc->intSubset = NULL;
11255 newDoc->extSubset = NULL;
11256 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000011257 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000011258 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011259 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011260 nodePush(ctxt, newDoc->children);
11261 if (doc == NULL) {
11262 ctxt->myDoc = newDoc;
11263 } else {
11264 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011265 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000011266 }
11267
Daniel Veillard87a764e2001-06-20 17:41:10 +000011268 /*
11269 * Get the 4 first bytes and decode the charset
11270 * if enc != XML_CHAR_ENCODING_NONE
11271 * plug some encoding conversion routines.
11272 */
11273 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011274 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11275 start[0] = RAW;
11276 start[1] = NXT(1);
11277 start[2] = NXT(2);
11278 start[3] = NXT(3);
11279 enc = xmlDetectCharEncoding(start, 4);
11280 if (enc != XML_CHAR_ENCODING_NONE) {
11281 xmlSwitchEncoding(ctxt, enc);
11282 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000011283 }
11284
Owen Taylor3473f882001-02-23 17:55:21 +000011285 /*
11286 * Parse a possible text declaration first
11287 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000011288 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011289 xmlParseTextDecl(ctxt);
11290 }
11291
Owen Taylor3473f882001-02-23 17:55:21 +000011292 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011293 ctxt->depth = depth;
11294
11295 xmlParseContent(ctxt);
11296
Daniel Veillard561b7f82002-03-20 21:55:57 +000011297 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011298 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000011299 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011300 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011301 }
11302 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011303 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 }
11305
11306 if (!ctxt->wellFormed) {
11307 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011308 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000011309 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011310 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000011311 } else {
11312 if (list != NULL) {
11313 xmlNodePtr cur;
11314
11315 /*
11316 * Return the newly created nodeset after unlinking it from
11317 * they pseudo parent.
11318 */
11319 cur = newDoc->children->children;
11320 *list = cur;
11321 while (cur != NULL) {
11322 cur->parent = NULL;
11323 cur = cur->next;
11324 }
11325 newDoc->children->children = NULL;
11326 }
Daniel Veillard7d515752003-09-26 19:12:37 +000011327 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000011328 }
11329 if (sax != NULL)
11330 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000011331 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
11332 oldctxt->node_seq.length = ctxt->node_seq.length;
11333 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000011334 ctxt->node_seq.maximum = 0;
11335 ctxt->node_seq.length = 0;
11336 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011337 xmlFreeParserCtxt(ctxt);
11338 newDoc->intSubset = NULL;
11339 newDoc->extSubset = NULL;
11340 xmlFreeDoc(newDoc);
11341
11342 return(ret);
11343}
11344
Daniel Veillard81273902003-09-30 00:43:48 +000011345#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011346/**
Daniel Veillard257d9102001-05-08 10:41:44 +000011347 * xmlParseExternalEntity:
11348 * @doc: the document the chunk pertains to
11349 * @sax: the SAX handler bloc (possibly NULL)
11350 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11351 * @depth: Used for loop detection, use 0
11352 * @URL: the URL for the entity to load
11353 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011354 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000011355 *
11356 * Parse an external general entity
11357 * An external general parsed entity is well-formed if it matches the
11358 * production labeled extParsedEnt.
11359 *
11360 * [78] extParsedEnt ::= TextDecl? content
11361 *
11362 * Returns 0 if the entity is well formed, -1 in case of args problem and
11363 * the parser error code otherwise
11364 */
11365
11366int
11367xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000011368 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000011369 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011370 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000011371}
11372
11373/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011374 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011375 * @doc: the document the chunk pertains to
11376 * @sax: the SAX handler bloc (possibly NULL)
11377 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11378 * @depth: Used for loop detection, use 0
11379 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011380 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011381 *
11382 * Parse a well-balanced chunk of an XML document
11383 * called by the parser
11384 * The allowed sequence for the Well Balanced Chunk is the one defined by
11385 * the content production in the XML grammar:
11386 *
11387 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11388 *
11389 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11390 * the parser error code otherwise
11391 */
11392
11393int
11394xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011395 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011396 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11397 depth, string, lst, 0 );
11398}
Daniel Veillard81273902003-09-30 00:43:48 +000011399#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011400
11401/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011402 * xmlParseBalancedChunkMemoryInternal:
11403 * @oldctxt: the existing parsing context
11404 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11405 * @user_data: the user data field for the parser context
11406 * @lst: the return value for the set of parsed nodes
11407 *
11408 *
11409 * Parse a well-balanced chunk of an XML document
11410 * called by the parser
11411 * The allowed sequence for the Well Balanced Chunk is the one defined by
11412 * the content production in the XML grammar:
11413 *
11414 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11415 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011416 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11417 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011418 *
11419 * In case recover is set to 1, the nodelist will not be empty even if
11420 * the parsed chunk is not well balanced.
11421 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011422static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011423xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11424 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11425 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011426 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011427 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011428 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011429 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011430 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011431 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011432 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011433
11434 if (oldctxt->depth > 40) {
11435 return(XML_ERR_ENTITY_LOOP);
11436 }
11437
11438
11439 if (lst != NULL)
11440 *lst = NULL;
11441 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011442 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011443
11444 size = xmlStrlen(string);
11445
11446 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011447 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011448 if (user_data != NULL)
11449 ctxt->userData = user_data;
11450 else
11451 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011452 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11453 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011454 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11455 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11456 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011457
11458 oldsax = ctxt->sax;
11459 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011460 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011461 ctxt->replaceEntities = oldctxt->replaceEntities;
11462 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011463
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011464 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011465 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011466 newDoc = xmlNewDoc(BAD_CAST "1.0");
11467 if (newDoc == NULL) {
11468 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011469 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011470 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011471 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011472 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011473 newDoc->dict = ctxt->dict;
11474 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011475 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011476 } else {
11477 ctxt->myDoc = oldctxt->myDoc;
11478 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011479 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011480 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011481 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11482 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011483 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011484 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011485 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011486 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011487 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011488 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011489 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011490 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011491 ctxt->myDoc->children = NULL;
11492 ctxt->myDoc->last = NULL;
11493 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011494 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011495 ctxt->instate = XML_PARSER_CONTENT;
11496 ctxt->depth = oldctxt->depth + 1;
11497
Daniel Veillard328f48c2002-11-15 15:24:34 +000011498 ctxt->validate = 0;
11499 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011500 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11501 /*
11502 * ID/IDREF registration will be done in xmlValidateElement below
11503 */
11504 ctxt->loadsubset |= XML_SKIP_IDS;
11505 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011506 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011507 ctxt->attsDefault = oldctxt->attsDefault;
11508 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011509
Daniel Veillard68e9e742002-11-16 15:35:11 +000011510 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011511 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011512 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011513 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011514 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011515 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011516 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011517 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011518 }
11519
11520 if (!ctxt->wellFormed) {
11521 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011522 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011523 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011524 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011525 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011526 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011527 }
11528
William M. Brack7b9154b2003-09-27 19:23:50 +000011529 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011530 xmlNodePtr cur;
11531
11532 /*
11533 * Return the newly created nodeset after unlinking it from
11534 * they pseudo parent.
11535 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011536 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011537 *lst = cur;
11538 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011539#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000011540 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
11541 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
11542 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000011543 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11544 oldctxt->myDoc, cur);
11545 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011546#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011547 cur->parent = NULL;
11548 cur = cur->next;
11549 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011550 ctxt->myDoc->children->children = NULL;
11551 }
11552 if (ctxt->myDoc != NULL) {
11553 xmlFreeNode(ctxt->myDoc->children);
11554 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011555 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011556 }
11557
11558 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011559 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011560 ctxt->attsDefault = NULL;
11561 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011562 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011563 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011564 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011565 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011566
11567 return(ret);
11568}
11569
Daniel Veillard29b17482004-08-16 00:39:03 +000011570/**
11571 * xmlParseInNodeContext:
11572 * @node: the context node
11573 * @data: the input string
11574 * @datalen: the input string length in bytes
11575 * @options: a combination of xmlParserOption
11576 * @lst: the return value for the set of parsed nodes
11577 *
11578 * Parse a well-balanced chunk of an XML document
11579 * within the context (DTD, namespaces, etc ...) of the given node.
11580 *
11581 * The allowed sequence for the data is a Well Balanced Chunk defined by
11582 * the content production in the XML grammar:
11583 *
11584 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11585 *
11586 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11587 * error code otherwise
11588 */
11589xmlParserErrors
11590xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11591 int options, xmlNodePtr *lst) {
11592#ifdef SAX2
11593 xmlParserCtxtPtr ctxt;
11594 xmlDocPtr doc = NULL;
11595 xmlNodePtr fake, cur;
11596 int nsnr = 0;
11597
11598 xmlParserErrors ret = XML_ERR_OK;
11599
11600 /*
11601 * check all input parameters, grab the document
11602 */
11603 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11604 return(XML_ERR_INTERNAL_ERROR);
11605 switch (node->type) {
11606 case XML_ELEMENT_NODE:
11607 case XML_ATTRIBUTE_NODE:
11608 case XML_TEXT_NODE:
11609 case XML_CDATA_SECTION_NODE:
11610 case XML_ENTITY_REF_NODE:
11611 case XML_PI_NODE:
11612 case XML_COMMENT_NODE:
11613 case XML_DOCUMENT_NODE:
11614 case XML_HTML_DOCUMENT_NODE:
11615 break;
11616 default:
11617 return(XML_ERR_INTERNAL_ERROR);
11618
11619 }
11620 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11621 (node->type != XML_DOCUMENT_NODE) &&
11622 (node->type != XML_HTML_DOCUMENT_NODE))
11623 node = node->parent;
11624 if (node == NULL)
11625 return(XML_ERR_INTERNAL_ERROR);
11626 if (node->type == XML_ELEMENT_NODE)
11627 doc = node->doc;
11628 else
11629 doc = (xmlDocPtr) node;
11630 if (doc == NULL)
11631 return(XML_ERR_INTERNAL_ERROR);
11632
11633 /*
11634 * allocate a context and set-up everything not related to the
11635 * node position in the tree
11636 */
11637 if (doc->type == XML_DOCUMENT_NODE)
11638 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11639#ifdef LIBXML_HTML_ENABLED
11640 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11641 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11642#endif
11643 else
11644 return(XML_ERR_INTERNAL_ERROR);
11645
11646 if (ctxt == NULL)
11647 return(XML_ERR_NO_MEMORY);
11648 fake = xmlNewComment(NULL);
11649 if (fake == NULL) {
11650 xmlFreeParserCtxt(ctxt);
11651 return(XML_ERR_NO_MEMORY);
11652 }
11653 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011654
11655 /*
11656 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11657 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11658 * we must wait until the last moment to free the original one.
11659 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011660 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011661 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011662 xmlDictFree(ctxt->dict);
11663 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011664 } else
11665 options |= XML_PARSE_NODICT;
11666
11667 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011668 xmlDetectSAX2(ctxt);
11669 ctxt->myDoc = doc;
11670
11671 if (node->type == XML_ELEMENT_NODE) {
11672 nodePush(ctxt, node);
11673 /*
11674 * initialize the SAX2 namespaces stack
11675 */
11676 cur = node;
11677 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11678 xmlNsPtr ns = cur->nsDef;
11679 const xmlChar *iprefix, *ihref;
11680
11681 while (ns != NULL) {
11682 if (ctxt->dict) {
11683 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11684 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11685 } else {
11686 iprefix = ns->prefix;
11687 ihref = ns->href;
11688 }
11689
11690 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11691 nsPush(ctxt, iprefix, ihref);
11692 nsnr++;
11693 }
11694 ns = ns->next;
11695 }
11696 cur = cur->parent;
11697 }
11698 ctxt->instate = XML_PARSER_CONTENT;
11699 }
11700
11701 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11702 /*
11703 * ID/IDREF registration will be done in xmlValidateElement below
11704 */
11705 ctxt->loadsubset |= XML_SKIP_IDS;
11706 }
11707
11708 xmlParseContent(ctxt);
11709 nsPop(ctxt, nsnr);
11710 if ((RAW == '<') && (NXT(1) == '/')) {
11711 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11712 } else if (RAW != 0) {
11713 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11714 }
11715 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11716 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11717 ctxt->wellFormed = 0;
11718 }
11719
11720 if (!ctxt->wellFormed) {
11721 if (ctxt->errNo == 0)
11722 ret = XML_ERR_INTERNAL_ERROR;
11723 else
11724 ret = (xmlParserErrors)ctxt->errNo;
11725 } else {
11726 ret = XML_ERR_OK;
11727 }
11728
11729 /*
11730 * Return the newly created nodeset after unlinking it from
11731 * the pseudo sibling.
11732 */
11733
11734 cur = fake->next;
11735 fake->next = NULL;
11736 node->last = fake;
11737
11738 if (cur != NULL) {
11739 cur->prev = NULL;
11740 }
11741
11742 *lst = cur;
11743
11744 while (cur != NULL) {
11745 cur->parent = NULL;
11746 cur = cur->next;
11747 }
11748
11749 xmlUnlinkNode(fake);
11750 xmlFreeNode(fake);
11751
11752
11753 if (ret != XML_ERR_OK) {
11754 xmlFreeNodeList(*lst);
11755 *lst = NULL;
11756 }
William M. Brackc3f81342004-10-03 01:22:44 +000011757
William M. Brackb7b54de2004-10-06 16:38:01 +000011758 if (doc->dict != NULL)
11759 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011760 xmlFreeParserCtxt(ctxt);
11761
11762 return(ret);
11763#else /* !SAX2 */
11764 return(XML_ERR_INTERNAL_ERROR);
11765#endif
11766}
11767
Daniel Veillard81273902003-09-30 00:43:48 +000011768#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011769/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011770 * xmlParseBalancedChunkMemoryRecover:
11771 * @doc: the document the chunk pertains to
11772 * @sax: the SAX handler bloc (possibly NULL)
11773 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11774 * @depth: Used for loop detection, use 0
11775 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11776 * @lst: the return value for the set of parsed nodes
11777 * @recover: return nodes even if the data is broken (use 0)
11778 *
11779 *
11780 * Parse a well-balanced chunk of an XML document
11781 * called by the parser
11782 * The allowed sequence for the Well Balanced Chunk is the one defined by
11783 * the content production in the XML grammar:
11784 *
11785 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11786 *
11787 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11788 * the parser error code otherwise
11789 *
11790 * In case recover is set to 1, the nodelist will not be empty even if
11791 * the parsed chunk is not well balanced.
11792 */
11793int
11794xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11795 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11796 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011797 xmlParserCtxtPtr ctxt;
11798 xmlDocPtr newDoc;
11799 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011800 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011801 int size;
11802 int ret = 0;
11803
11804 if (depth > 40) {
11805 return(XML_ERR_ENTITY_LOOP);
11806 }
11807
11808
Daniel Veillardcda96922001-08-21 10:56:31 +000011809 if (lst != NULL)
11810 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011811 if (string == NULL)
11812 return(-1);
11813
11814 size = xmlStrlen(string);
11815
11816 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11817 if (ctxt == NULL) return(-1);
11818 ctxt->userData = ctxt;
11819 if (sax != NULL) {
11820 oldsax = ctxt->sax;
11821 ctxt->sax = sax;
11822 if (user_data != NULL)
11823 ctxt->userData = user_data;
11824 }
11825 newDoc = xmlNewDoc(BAD_CAST "1.0");
11826 if (newDoc == NULL) {
11827 xmlFreeParserCtxt(ctxt);
11828 return(-1);
11829 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011830 if ((doc != NULL) && (doc->dict != NULL)) {
11831 xmlDictFree(ctxt->dict);
11832 ctxt->dict = doc->dict;
11833 xmlDictReference(ctxt->dict);
11834 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11835 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11836 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11837 ctxt->dictNames = 1;
11838 } else {
11839 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11840 }
Owen Taylor3473f882001-02-23 17:55:21 +000011841 if (doc != NULL) {
11842 newDoc->intSubset = doc->intSubset;
11843 newDoc->extSubset = doc->extSubset;
11844 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011845 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11846 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011847 if (sax != NULL)
11848 ctxt->sax = oldsax;
11849 xmlFreeParserCtxt(ctxt);
11850 newDoc->intSubset = NULL;
11851 newDoc->extSubset = NULL;
11852 xmlFreeDoc(newDoc);
11853 return(-1);
11854 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011855 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11856 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011857 if (doc == NULL) {
11858 ctxt->myDoc = newDoc;
11859 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011860 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011861 newDoc->children->doc = doc;
11862 }
11863 ctxt->instate = XML_PARSER_CONTENT;
11864 ctxt->depth = depth;
11865
11866 /*
11867 * Doing validity checking on chunk doesn't make sense
11868 */
11869 ctxt->validate = 0;
11870 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011871 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011872
Daniel Veillardb39bc392002-10-26 19:29:51 +000011873 if ( doc != NULL ){
11874 content = doc->children;
11875 doc->children = NULL;
11876 xmlParseContent(ctxt);
11877 doc->children = content;
11878 }
11879 else {
11880 xmlParseContent(ctxt);
11881 }
Owen Taylor3473f882001-02-23 17:55:21 +000011882 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011883 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011884 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011885 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011886 }
11887 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011888 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011889 }
11890
11891 if (!ctxt->wellFormed) {
11892 if (ctxt->errNo == 0)
11893 ret = 1;
11894 else
11895 ret = ctxt->errNo;
11896 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011897 ret = 0;
11898 }
11899
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011900 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11901 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011902
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011903 /*
11904 * Return the newly created nodeset after unlinking it from
11905 * they pseudo parent.
11906 */
11907 cur = newDoc->children->children;
11908 *lst = cur;
11909 while (cur != NULL) {
11910 xmlSetTreeDoc(cur, doc);
11911 cur->parent = NULL;
11912 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011913 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011914 newDoc->children->children = NULL;
11915 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011916
Owen Taylor3473f882001-02-23 17:55:21 +000011917 if (sax != NULL)
11918 ctxt->sax = oldsax;
11919 xmlFreeParserCtxt(ctxt);
11920 newDoc->intSubset = NULL;
11921 newDoc->extSubset = NULL;
11922 xmlFreeDoc(newDoc);
11923
11924 return(ret);
11925}
11926
11927/**
11928 * xmlSAXParseEntity:
11929 * @sax: the SAX handler block
11930 * @filename: the filename
11931 *
11932 * parse an XML external entity out of context and build a tree.
11933 * It use the given SAX function block to handle the parsing callback.
11934 * If sax is NULL, fallback to the default DOM tree building routines.
11935 *
11936 * [78] extParsedEnt ::= TextDecl? content
11937 *
11938 * This correspond to a "Well Balanced" chunk
11939 *
11940 * Returns the resulting document tree
11941 */
11942
11943xmlDocPtr
11944xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11945 xmlDocPtr ret;
11946 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011947
11948 ctxt = xmlCreateFileParserCtxt(filename);
11949 if (ctxt == NULL) {
11950 return(NULL);
11951 }
11952 if (sax != NULL) {
11953 if (ctxt->sax != NULL)
11954 xmlFree(ctxt->sax);
11955 ctxt->sax = sax;
11956 ctxt->userData = NULL;
11957 }
11958
Owen Taylor3473f882001-02-23 17:55:21 +000011959 xmlParseExtParsedEnt(ctxt);
11960
11961 if (ctxt->wellFormed)
11962 ret = ctxt->myDoc;
11963 else {
11964 ret = NULL;
11965 xmlFreeDoc(ctxt->myDoc);
11966 ctxt->myDoc = NULL;
11967 }
11968 if (sax != NULL)
11969 ctxt->sax = NULL;
11970 xmlFreeParserCtxt(ctxt);
11971
11972 return(ret);
11973}
11974
11975/**
11976 * xmlParseEntity:
11977 * @filename: the filename
11978 *
11979 * parse an XML external entity out of context and build a tree.
11980 *
11981 * [78] extParsedEnt ::= TextDecl? content
11982 *
11983 * This correspond to a "Well Balanced" chunk
11984 *
11985 * Returns the resulting document tree
11986 */
11987
11988xmlDocPtr
11989xmlParseEntity(const char *filename) {
11990 return(xmlSAXParseEntity(NULL, filename));
11991}
Daniel Veillard81273902003-09-30 00:43:48 +000011992#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011993
11994/**
11995 * xmlCreateEntityParserCtxt:
11996 * @URL: the entity URL
11997 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011998 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011999 *
12000 * Create a parser context for an external entity
12001 * Automatic support for ZLIB/Compress compressed document is provided
12002 * by default if found at compile-time.
12003 *
12004 * Returns the new parser context or NULL
12005 */
12006xmlParserCtxtPtr
12007xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12008 const xmlChar *base) {
12009 xmlParserCtxtPtr ctxt;
12010 xmlParserInputPtr inputStream;
12011 char *directory = NULL;
12012 xmlChar *uri;
12013
12014 ctxt = xmlNewParserCtxt();
12015 if (ctxt == NULL) {
12016 return(NULL);
12017 }
12018
12019 uri = xmlBuildURI(URL, base);
12020
12021 if (uri == NULL) {
12022 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12023 if (inputStream == NULL) {
12024 xmlFreeParserCtxt(ctxt);
12025 return(NULL);
12026 }
12027
12028 inputPush(ctxt, inputStream);
12029
12030 if ((ctxt->directory == NULL) && (directory == NULL))
12031 directory = xmlParserGetDirectory((char *)URL);
12032 if ((ctxt->directory == NULL) && (directory != NULL))
12033 ctxt->directory = directory;
12034 } else {
12035 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12036 if (inputStream == NULL) {
12037 xmlFree(uri);
12038 xmlFreeParserCtxt(ctxt);
12039 return(NULL);
12040 }
12041
12042 inputPush(ctxt, inputStream);
12043
12044 if ((ctxt->directory == NULL) && (directory == NULL))
12045 directory = xmlParserGetDirectory((char *)uri);
12046 if ((ctxt->directory == NULL) && (directory != NULL))
12047 ctxt->directory = directory;
12048 xmlFree(uri);
12049 }
Owen Taylor3473f882001-02-23 17:55:21 +000012050 return(ctxt);
12051}
12052
12053/************************************************************************
12054 * *
12055 * Front ends when parsing from a file *
12056 * *
12057 ************************************************************************/
12058
12059/**
Daniel Veillard61b93382003-11-03 14:28:31 +000012060 * xmlCreateURLParserCtxt:
12061 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012062 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000012063 *
Daniel Veillard61b93382003-11-03 14:28:31 +000012064 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000012065 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000012066 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000012067 *
12068 * Returns the new parser context or NULL
12069 */
12070xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000012071xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000012072{
12073 xmlParserCtxtPtr ctxt;
12074 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000012075 char *directory = NULL;
12076
Owen Taylor3473f882001-02-23 17:55:21 +000012077 ctxt = xmlNewParserCtxt();
12078 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012079 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000012080 return(NULL);
12081 }
12082
Daniel Veillarddf292f72005-01-16 19:00:15 +000012083 if (options)
12084 xmlCtxtUseOptions(ctxt, options);
12085 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000012086
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000012087 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012088 if (inputStream == NULL) {
12089 xmlFreeParserCtxt(ctxt);
12090 return(NULL);
12091 }
12092
Owen Taylor3473f882001-02-23 17:55:21 +000012093 inputPush(ctxt, inputStream);
12094 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012095 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012096 if ((ctxt->directory == NULL) && (directory != NULL))
12097 ctxt->directory = directory;
12098
12099 return(ctxt);
12100}
12101
Daniel Veillard61b93382003-11-03 14:28:31 +000012102/**
12103 * xmlCreateFileParserCtxt:
12104 * @filename: the filename
12105 *
12106 * Create a parser context for a file content.
12107 * Automatic support for ZLIB/Compress compressed document is provided
12108 * by default if found at compile-time.
12109 *
12110 * Returns the new parser context or NULL
12111 */
12112xmlParserCtxtPtr
12113xmlCreateFileParserCtxt(const char *filename)
12114{
12115 return(xmlCreateURLParserCtxt(filename, 0));
12116}
12117
Daniel Veillard81273902003-09-30 00:43:48 +000012118#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012119/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012120 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000012121 * @sax: the SAX handler block
12122 * @filename: the filename
12123 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12124 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000012125 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000012126 *
12127 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12128 * compressed document is provided by default if found at compile-time.
12129 * It use the given SAX function block to handle the parsing callback.
12130 * If sax is NULL, fallback to the default DOM tree building routines.
12131 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000012132 * User data (void *) is stored within the parser context in the
12133 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000012134 *
Owen Taylor3473f882001-02-23 17:55:21 +000012135 * Returns the resulting document tree
12136 */
12137
12138xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000012139xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12140 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000012141 xmlDocPtr ret;
12142 xmlParserCtxtPtr ctxt;
12143 char *directory = NULL;
12144
Daniel Veillard635ef722001-10-29 11:48:19 +000012145 xmlInitParser();
12146
Owen Taylor3473f882001-02-23 17:55:21 +000012147 ctxt = xmlCreateFileParserCtxt(filename);
12148 if (ctxt == NULL) {
12149 return(NULL);
12150 }
12151 if (sax != NULL) {
12152 if (ctxt->sax != NULL)
12153 xmlFree(ctxt->sax);
12154 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012155 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012156 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000012157 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000012158 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000012159 }
Owen Taylor3473f882001-02-23 17:55:21 +000012160
12161 if ((ctxt->directory == NULL) && (directory == NULL))
12162 directory = xmlParserGetDirectory(filename);
12163 if ((ctxt->directory == NULL) && (directory != NULL))
12164 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
12165
Daniel Veillarddad3f682002-11-17 16:47:27 +000012166 ctxt->recovery = recovery;
12167
Owen Taylor3473f882001-02-23 17:55:21 +000012168 xmlParseDocument(ctxt);
12169
William M. Brackc07329e2003-09-08 01:57:30 +000012170 if ((ctxt->wellFormed) || recovery) {
12171 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000012172 if (ret != NULL) {
12173 if (ctxt->input->buf->compressed > 0)
12174 ret->compression = 9;
12175 else
12176 ret->compression = ctxt->input->buf->compressed;
12177 }
William M. Brackc07329e2003-09-08 01:57:30 +000012178 }
Owen Taylor3473f882001-02-23 17:55:21 +000012179 else {
12180 ret = NULL;
12181 xmlFreeDoc(ctxt->myDoc);
12182 ctxt->myDoc = NULL;
12183 }
12184 if (sax != NULL)
12185 ctxt->sax = NULL;
12186 xmlFreeParserCtxt(ctxt);
12187
12188 return(ret);
12189}
12190
12191/**
Daniel Veillarda293c322001-10-02 13:54:14 +000012192 * xmlSAXParseFile:
12193 * @sax: the SAX handler block
12194 * @filename: the filename
12195 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12196 * documents
12197 *
12198 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12199 * compressed document is provided by default if found at compile-time.
12200 * It use the given SAX function block to handle the parsing callback.
12201 * If sax is NULL, fallback to the default DOM tree building routines.
12202 *
12203 * Returns the resulting document tree
12204 */
12205
12206xmlDocPtr
12207xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12208 int recovery) {
12209 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12210}
12211
12212/**
Owen Taylor3473f882001-02-23 17:55:21 +000012213 * xmlRecoverDoc:
12214 * @cur: a pointer to an array of xmlChar
12215 *
12216 * parse an XML in-memory document and build a tree.
12217 * In the case the document is not Well Formed, a tree is built anyway
12218 *
12219 * Returns the resulting document tree
12220 */
12221
12222xmlDocPtr
12223xmlRecoverDoc(xmlChar *cur) {
12224 return(xmlSAXParseDoc(NULL, cur, 1));
12225}
12226
12227/**
12228 * xmlParseFile:
12229 * @filename: the filename
12230 *
12231 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12232 * compressed document is provided by default if found at compile-time.
12233 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000012234 * Returns the resulting document tree if the file was wellformed,
12235 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000012236 */
12237
12238xmlDocPtr
12239xmlParseFile(const char *filename) {
12240 return(xmlSAXParseFile(NULL, filename, 0));
12241}
12242
12243/**
12244 * xmlRecoverFile:
12245 * @filename: the filename
12246 *
12247 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12248 * compressed document is provided by default if found at compile-time.
12249 * In the case the document is not Well Formed, a tree is built anyway
12250 *
12251 * Returns the resulting document tree
12252 */
12253
12254xmlDocPtr
12255xmlRecoverFile(const char *filename) {
12256 return(xmlSAXParseFile(NULL, filename, 1));
12257}
12258
12259
12260/**
12261 * xmlSetupParserForBuffer:
12262 * @ctxt: an XML parser context
12263 * @buffer: a xmlChar * buffer
12264 * @filename: a file name
12265 *
12266 * Setup the parser context to parse a new buffer; Clears any prior
12267 * contents from the parser context. The buffer parameter must not be
12268 * NULL, but the filename parameter can be
12269 */
12270void
12271xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12272 const char* filename)
12273{
12274 xmlParserInputPtr input;
12275
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012276 if ((ctxt == NULL) || (buffer == NULL))
12277 return;
12278
Owen Taylor3473f882001-02-23 17:55:21 +000012279 input = xmlNewInputStream(ctxt);
12280 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012281 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012282 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012283 return;
12284 }
12285
12286 xmlClearParserCtxt(ctxt);
12287 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000012288 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000012289 input->base = buffer;
12290 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012291 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000012292 inputPush(ctxt, input);
12293}
12294
12295/**
12296 * xmlSAXUserParseFile:
12297 * @sax: a SAX handler
12298 * @user_data: The user data returned on SAX callbacks
12299 * @filename: a file name
12300 *
12301 * parse an XML file and call the given SAX handler routines.
12302 * Automatic support for ZLIB/Compress compressed document is provided
12303 *
12304 * Returns 0 in case of success or a error number otherwise
12305 */
12306int
12307xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12308 const char *filename) {
12309 int ret = 0;
12310 xmlParserCtxtPtr ctxt;
12311
12312 ctxt = xmlCreateFileParserCtxt(filename);
12313 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000012314#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012315 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012316#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012317 xmlFree(ctxt->sax);
12318 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012319 xmlDetectSAX2(ctxt);
12320
Owen Taylor3473f882001-02-23 17:55:21 +000012321 if (user_data != NULL)
12322 ctxt->userData = user_data;
12323
12324 xmlParseDocument(ctxt);
12325
12326 if (ctxt->wellFormed)
12327 ret = 0;
12328 else {
12329 if (ctxt->errNo != 0)
12330 ret = ctxt->errNo;
12331 else
12332 ret = -1;
12333 }
12334 if (sax != NULL)
12335 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000012336 if (ctxt->myDoc != NULL) {
12337 xmlFreeDoc(ctxt->myDoc);
12338 ctxt->myDoc = NULL;
12339 }
Owen Taylor3473f882001-02-23 17:55:21 +000012340 xmlFreeParserCtxt(ctxt);
12341
12342 return ret;
12343}
Daniel Veillard81273902003-09-30 00:43:48 +000012344#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012345
12346/************************************************************************
12347 * *
12348 * Front ends when parsing from memory *
12349 * *
12350 ************************************************************************/
12351
12352/**
12353 * xmlCreateMemoryParserCtxt:
12354 * @buffer: a pointer to a char array
12355 * @size: the size of the array
12356 *
12357 * Create a parser context for an XML in-memory document.
12358 *
12359 * Returns the new parser context or NULL
12360 */
12361xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012362xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012363 xmlParserCtxtPtr ctxt;
12364 xmlParserInputPtr input;
12365 xmlParserInputBufferPtr buf;
12366
12367 if (buffer == NULL)
12368 return(NULL);
12369 if (size <= 0)
12370 return(NULL);
12371
12372 ctxt = xmlNewParserCtxt();
12373 if (ctxt == NULL)
12374 return(NULL);
12375
Daniel Veillard53350552003-09-18 13:35:51 +000012376 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012377 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012378 if (buf == NULL) {
12379 xmlFreeParserCtxt(ctxt);
12380 return(NULL);
12381 }
Owen Taylor3473f882001-02-23 17:55:21 +000012382
12383 input = xmlNewInputStream(ctxt);
12384 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012385 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012386 xmlFreeParserCtxt(ctxt);
12387 return(NULL);
12388 }
12389
12390 input->filename = NULL;
12391 input->buf = buf;
12392 input->base = input->buf->buffer->content;
12393 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012394 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012395
12396 inputPush(ctxt, input);
12397 return(ctxt);
12398}
12399
Daniel Veillard81273902003-09-30 00:43:48 +000012400#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012401/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012402 * xmlSAXParseMemoryWithData:
12403 * @sax: the SAX handler block
12404 * @buffer: an pointer to a char array
12405 * @size: the size of the array
12406 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12407 * documents
12408 * @data: the userdata
12409 *
12410 * parse an XML in-memory block and use the given SAX function block
12411 * to handle the parsing callback. If sax is NULL, fallback to the default
12412 * DOM tree building routines.
12413 *
12414 * User data (void *) is stored within the parser context in the
12415 * context's _private member, so it is available nearly everywhere in libxml
12416 *
12417 * Returns the resulting document tree
12418 */
12419
12420xmlDocPtr
12421xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12422 int size, int recovery, void *data) {
12423 xmlDocPtr ret;
12424 xmlParserCtxtPtr ctxt;
12425
12426 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12427 if (ctxt == NULL) return(NULL);
12428 if (sax != NULL) {
12429 if (ctxt->sax != NULL)
12430 xmlFree(ctxt->sax);
12431 ctxt->sax = sax;
12432 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012433 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012434 if (data!=NULL) {
12435 ctxt->_private=data;
12436 }
12437
Daniel Veillardadba5f12003-04-04 16:09:01 +000012438 ctxt->recovery = recovery;
12439
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012440 xmlParseDocument(ctxt);
12441
12442 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12443 else {
12444 ret = NULL;
12445 xmlFreeDoc(ctxt->myDoc);
12446 ctxt->myDoc = NULL;
12447 }
12448 if (sax != NULL)
12449 ctxt->sax = NULL;
12450 xmlFreeParserCtxt(ctxt);
12451
12452 return(ret);
12453}
12454
12455/**
Owen Taylor3473f882001-02-23 17:55:21 +000012456 * xmlSAXParseMemory:
12457 * @sax: the SAX handler block
12458 * @buffer: an pointer to a char array
12459 * @size: the size of the array
12460 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12461 * documents
12462 *
12463 * parse an XML in-memory block and use the given SAX function block
12464 * to handle the parsing callback. If sax is NULL, fallback to the default
12465 * DOM tree building routines.
12466 *
12467 * Returns the resulting document tree
12468 */
12469xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012470xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12471 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012472 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012473}
12474
12475/**
12476 * xmlParseMemory:
12477 * @buffer: an pointer to a char array
12478 * @size: the size of the array
12479 *
12480 * parse an XML in-memory block and build a tree.
12481 *
12482 * Returns the resulting document tree
12483 */
12484
Daniel Veillard50822cb2001-07-26 20:05:51 +000012485xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012486 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12487}
12488
12489/**
12490 * xmlRecoverMemory:
12491 * @buffer: an pointer to a char array
12492 * @size: the size of the array
12493 *
12494 * parse an XML in-memory block and build a tree.
12495 * In the case the document is not Well Formed, a tree is built anyway
12496 *
12497 * Returns the resulting document tree
12498 */
12499
Daniel Veillard50822cb2001-07-26 20:05:51 +000012500xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012501 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12502}
12503
12504/**
12505 * xmlSAXUserParseMemory:
12506 * @sax: a SAX handler
12507 * @user_data: The user data returned on SAX callbacks
12508 * @buffer: an in-memory XML document input
12509 * @size: the length of the XML document in bytes
12510 *
12511 * A better SAX parsing routine.
12512 * parse an XML in-memory buffer and call the given SAX handler routines.
12513 *
12514 * Returns 0 in case of success or a error number otherwise
12515 */
12516int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012517 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012518 int ret = 0;
12519 xmlParserCtxtPtr ctxt;
12520 xmlSAXHandlerPtr oldsax = NULL;
12521
Daniel Veillard9e923512002-08-14 08:48:52 +000012522 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012523 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12524 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012525 oldsax = ctxt->sax;
12526 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012527 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012528 if (user_data != NULL)
12529 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012530
12531 xmlParseDocument(ctxt);
12532
12533 if (ctxt->wellFormed)
12534 ret = 0;
12535 else {
12536 if (ctxt->errNo != 0)
12537 ret = ctxt->errNo;
12538 else
12539 ret = -1;
12540 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012541 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012542 if (ctxt->myDoc != NULL) {
12543 xmlFreeDoc(ctxt->myDoc);
12544 ctxt->myDoc = NULL;
12545 }
Owen Taylor3473f882001-02-23 17:55:21 +000012546 xmlFreeParserCtxt(ctxt);
12547
12548 return ret;
12549}
Daniel Veillard81273902003-09-30 00:43:48 +000012550#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012551
12552/**
12553 * xmlCreateDocParserCtxt:
12554 * @cur: a pointer to an array of xmlChar
12555 *
12556 * Creates a parser context for an XML in-memory document.
12557 *
12558 * Returns the new parser context or NULL
12559 */
12560xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012561xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012562 int len;
12563
12564 if (cur == NULL)
12565 return(NULL);
12566 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012567 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012568}
12569
Daniel Veillard81273902003-09-30 00:43:48 +000012570#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012571/**
12572 * xmlSAXParseDoc:
12573 * @sax: the SAX handler block
12574 * @cur: a pointer to an array of xmlChar
12575 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12576 * documents
12577 *
12578 * parse an XML in-memory document and build a tree.
12579 * It use the given SAX function block to handle the parsing callback.
12580 * If sax is NULL, fallback to the default DOM tree building routines.
12581 *
12582 * Returns the resulting document tree
12583 */
12584
12585xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012586xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000012587 xmlDocPtr ret;
12588 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012589 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012590
Daniel Veillard38936062004-11-04 17:45:11 +000012591 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012592
12593
12594 ctxt = xmlCreateDocParserCtxt(cur);
12595 if (ctxt == NULL) return(NULL);
12596 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012597 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012598 ctxt->sax = sax;
12599 ctxt->userData = NULL;
12600 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012601 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012602
12603 xmlParseDocument(ctxt);
12604 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12605 else {
12606 ret = NULL;
12607 xmlFreeDoc(ctxt->myDoc);
12608 ctxt->myDoc = NULL;
12609 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012610 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012611 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012612 xmlFreeParserCtxt(ctxt);
12613
12614 return(ret);
12615}
12616
12617/**
12618 * xmlParseDoc:
12619 * @cur: a pointer to an array of xmlChar
12620 *
12621 * parse an XML in-memory document and build a tree.
12622 *
12623 * Returns the resulting document tree
12624 */
12625
12626xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000012627xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012628 return(xmlSAXParseDoc(NULL, cur, 0));
12629}
Daniel Veillard81273902003-09-30 00:43:48 +000012630#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012631
Daniel Veillard81273902003-09-30 00:43:48 +000012632#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012633/************************************************************************
12634 * *
12635 * Specific function to keep track of entities references *
12636 * and used by the XSLT debugger *
12637 * *
12638 ************************************************************************/
12639
12640static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12641
12642/**
12643 * xmlAddEntityReference:
12644 * @ent : A valid entity
12645 * @firstNode : A valid first node for children of entity
12646 * @lastNode : A valid last node of children entity
12647 *
12648 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12649 */
12650static void
12651xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12652 xmlNodePtr lastNode)
12653{
12654 if (xmlEntityRefFunc != NULL) {
12655 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12656 }
12657}
12658
12659
12660/**
12661 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012662 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012663 *
12664 * Set the function to call call back when a xml reference has been made
12665 */
12666void
12667xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12668{
12669 xmlEntityRefFunc = func;
12670}
Daniel Veillard81273902003-09-30 00:43:48 +000012671#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012672
12673/************************************************************************
12674 * *
12675 * Miscellaneous *
12676 * *
12677 ************************************************************************/
12678
12679#ifdef LIBXML_XPATH_ENABLED
12680#include <libxml/xpath.h>
12681#endif
12682
Daniel Veillardffa3c742005-07-21 13:24:09 +000012683extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012684static int xmlParserInitialized = 0;
12685
12686/**
12687 * xmlInitParser:
12688 *
12689 * Initialization function for the XML parser.
12690 * This is not reentrant. Call once before processing in case of
12691 * use in multithreaded programs.
12692 */
12693
12694void
12695xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012696 if (xmlParserInitialized != 0)
12697 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012698
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012699 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12700 (xmlGenericError == NULL))
12701 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012702 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012703 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012704 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012705 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012706 xmlDefaultSAXHandlerInit();
12707 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012708#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012709 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012710#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012711#ifdef LIBXML_HTML_ENABLED
12712 htmlInitAutoClose();
12713 htmlDefaultSAXHandlerInit();
12714#endif
12715#ifdef LIBXML_XPATH_ENABLED
12716 xmlXPathInit();
12717#endif
12718 xmlParserInitialized = 1;
12719}
12720
12721/**
12722 * xmlCleanupParser:
12723 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012724 * Cleanup function for the XML library. It tries to reclaim all
12725 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012726 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012727 * function should not prevent reusing the library but one should
12728 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012729 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012730 */
12731
12732void
12733xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012734 if (!xmlParserInitialized)
12735 return;
12736
Owen Taylor3473f882001-02-23 17:55:21 +000012737 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012738#ifdef LIBXML_CATALOG_ENABLED
12739 xmlCatalogCleanup();
12740#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012741 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012742 xmlCleanupInputCallbacks();
12743#ifdef LIBXML_OUTPUT_ENABLED
12744 xmlCleanupOutputCallbacks();
12745#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012746#ifdef LIBXML_SCHEMAS_ENABLED
12747 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012748 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012749#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012750 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012751 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012752 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012753 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012754 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012755}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012756
12757/************************************************************************
12758 * *
12759 * New set (2.6.0) of simpler and more flexible APIs *
12760 * *
12761 ************************************************************************/
12762
12763/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012764 * DICT_FREE:
12765 * @str: a string
12766 *
12767 * Free a string if it is not owned by the "dict" dictionnary in the
12768 * current scope
12769 */
12770#define DICT_FREE(str) \
12771 if ((str) && ((!dict) || \
12772 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12773 xmlFree((char *)(str));
12774
12775/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012776 * xmlCtxtReset:
12777 * @ctxt: an XML parser context
12778 *
12779 * Reset a parser context
12780 */
12781void
12782xmlCtxtReset(xmlParserCtxtPtr ctxt)
12783{
12784 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012785 xmlDictPtr dict;
12786
12787 if (ctxt == NULL)
12788 return;
12789
12790 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012791
12792 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12793 xmlFreeInputStream(input);
12794 }
12795 ctxt->inputNr = 0;
12796 ctxt->input = NULL;
12797
12798 ctxt->spaceNr = 0;
12799 ctxt->spaceTab[0] = -1;
12800 ctxt->space = &ctxt->spaceTab[0];
12801
12802
12803 ctxt->nodeNr = 0;
12804 ctxt->node = NULL;
12805
12806 ctxt->nameNr = 0;
12807 ctxt->name = NULL;
12808
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012809 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012810 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012811 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012812 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012813 DICT_FREE(ctxt->directory);
12814 ctxt->directory = NULL;
12815 DICT_FREE(ctxt->extSubURI);
12816 ctxt->extSubURI = NULL;
12817 DICT_FREE(ctxt->extSubSystem);
12818 ctxt->extSubSystem = NULL;
12819 if (ctxt->myDoc != NULL)
12820 xmlFreeDoc(ctxt->myDoc);
12821 ctxt->myDoc = NULL;
12822
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012823 ctxt->standalone = -1;
12824 ctxt->hasExternalSubset = 0;
12825 ctxt->hasPErefs = 0;
12826 ctxt->html = 0;
12827 ctxt->external = 0;
12828 ctxt->instate = XML_PARSER_START;
12829 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012830
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012831 ctxt->wellFormed = 1;
12832 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012833 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012834 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012835#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012836 ctxt->vctxt.userData = ctxt;
12837 ctxt->vctxt.error = xmlParserValidityError;
12838 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012839#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012840 ctxt->record_info = 0;
12841 ctxt->nbChars = 0;
12842 ctxt->checkIndex = 0;
12843 ctxt->inSubset = 0;
12844 ctxt->errNo = XML_ERR_OK;
12845 ctxt->depth = 0;
12846 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12847 ctxt->catalogs = NULL;
12848 xmlInitNodeInfoSeq(&ctxt->node_seq);
12849
12850 if (ctxt->attsDefault != NULL) {
12851 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12852 ctxt->attsDefault = NULL;
12853 }
12854 if (ctxt->attsSpecial != NULL) {
12855 xmlHashFree(ctxt->attsSpecial, NULL);
12856 ctxt->attsSpecial = NULL;
12857 }
12858
Daniel Veillard4432df22003-09-28 18:58:27 +000012859#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012860 if (ctxt->catalogs != NULL)
12861 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012862#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012863 if (ctxt->lastError.code != XML_ERR_OK)
12864 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012865}
12866
12867/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012868 * xmlCtxtResetPush:
12869 * @ctxt: an XML parser context
12870 * @chunk: a pointer to an array of chars
12871 * @size: number of chars in the array
12872 * @filename: an optional file name or URI
12873 * @encoding: the document encoding, or NULL
12874 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012875 * Reset a push parser context
12876 *
12877 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012878 */
12879int
12880xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12881 int size, const char *filename, const char *encoding)
12882{
12883 xmlParserInputPtr inputStream;
12884 xmlParserInputBufferPtr buf;
12885 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12886
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012887 if (ctxt == NULL)
12888 return(1);
12889
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012890 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12891 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12892
12893 buf = xmlAllocParserInputBuffer(enc);
12894 if (buf == NULL)
12895 return(1);
12896
12897 if (ctxt == NULL) {
12898 xmlFreeParserInputBuffer(buf);
12899 return(1);
12900 }
12901
12902 xmlCtxtReset(ctxt);
12903
12904 if (ctxt->pushTab == NULL) {
12905 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12906 sizeof(xmlChar *));
12907 if (ctxt->pushTab == NULL) {
12908 xmlErrMemory(ctxt, NULL);
12909 xmlFreeParserInputBuffer(buf);
12910 return(1);
12911 }
12912 }
12913
12914 if (filename == NULL) {
12915 ctxt->directory = NULL;
12916 } else {
12917 ctxt->directory = xmlParserGetDirectory(filename);
12918 }
12919
12920 inputStream = xmlNewInputStream(ctxt);
12921 if (inputStream == NULL) {
12922 xmlFreeParserInputBuffer(buf);
12923 return(1);
12924 }
12925
12926 if (filename == NULL)
12927 inputStream->filename = NULL;
12928 else
12929 inputStream->filename = (char *)
12930 xmlCanonicPath((const xmlChar *) filename);
12931 inputStream->buf = buf;
12932 inputStream->base = inputStream->buf->buffer->content;
12933 inputStream->cur = inputStream->buf->buffer->content;
12934 inputStream->end =
12935 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12936
12937 inputPush(ctxt, inputStream);
12938
12939 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12940 (ctxt->input->buf != NULL)) {
12941 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12942 int cur = ctxt->input->cur - ctxt->input->base;
12943
12944 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12945
12946 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12947 ctxt->input->cur = ctxt->input->base + cur;
12948 ctxt->input->end =
12949 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12950 use];
12951#ifdef DEBUG_PUSH
12952 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12953#endif
12954 }
12955
12956 if (encoding != NULL) {
12957 xmlCharEncodingHandlerPtr hdlr;
12958
12959 hdlr = xmlFindCharEncodingHandler(encoding);
12960 if (hdlr != NULL) {
12961 xmlSwitchToEncoding(ctxt, hdlr);
12962 } else {
12963 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12964 "Unsupported encoding %s\n", BAD_CAST encoding);
12965 }
12966 } else if (enc != XML_CHAR_ENCODING_NONE) {
12967 xmlSwitchEncoding(ctxt, enc);
12968 }
12969
12970 return(0);
12971}
12972
12973/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012974 * xmlCtxtUseOptions:
12975 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012976 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012977 *
12978 * Applies the options to the parser context
12979 *
12980 * Returns 0 in case of success, the set of unknown or unimplemented options
12981 * in case of error.
12982 */
12983int
12984xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12985{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012986 if (ctxt == NULL)
12987 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012988 if (options & XML_PARSE_RECOVER) {
12989 ctxt->recovery = 1;
12990 options -= XML_PARSE_RECOVER;
12991 } else
12992 ctxt->recovery = 0;
12993 if (options & XML_PARSE_DTDLOAD) {
12994 ctxt->loadsubset = XML_DETECT_IDS;
12995 options -= XML_PARSE_DTDLOAD;
12996 } else
12997 ctxt->loadsubset = 0;
12998 if (options & XML_PARSE_DTDATTR) {
12999 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13000 options -= XML_PARSE_DTDATTR;
13001 }
13002 if (options & XML_PARSE_NOENT) {
13003 ctxt->replaceEntities = 1;
13004 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13005 options -= XML_PARSE_NOENT;
13006 } else
13007 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013008 if (options & XML_PARSE_PEDANTIC) {
13009 ctxt->pedantic = 1;
13010 options -= XML_PARSE_PEDANTIC;
13011 } else
13012 ctxt->pedantic = 0;
13013 if (options & XML_PARSE_NOBLANKS) {
13014 ctxt->keepBlanks = 0;
13015 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13016 options -= XML_PARSE_NOBLANKS;
13017 } else
13018 ctxt->keepBlanks = 1;
13019 if (options & XML_PARSE_DTDVALID) {
13020 ctxt->validate = 1;
13021 if (options & XML_PARSE_NOWARNING)
13022 ctxt->vctxt.warning = NULL;
13023 if (options & XML_PARSE_NOERROR)
13024 ctxt->vctxt.error = NULL;
13025 options -= XML_PARSE_DTDVALID;
13026 } else
13027 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000013028 if (options & XML_PARSE_NOWARNING) {
13029 ctxt->sax->warning = NULL;
13030 options -= XML_PARSE_NOWARNING;
13031 }
13032 if (options & XML_PARSE_NOERROR) {
13033 ctxt->sax->error = NULL;
13034 ctxt->sax->fatalError = NULL;
13035 options -= XML_PARSE_NOERROR;
13036 }
Daniel Veillard81273902003-09-30 00:43:48 +000013037#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013038 if (options & XML_PARSE_SAX1) {
13039 ctxt->sax->startElement = xmlSAX2StartElement;
13040 ctxt->sax->endElement = xmlSAX2EndElement;
13041 ctxt->sax->startElementNs = NULL;
13042 ctxt->sax->endElementNs = NULL;
13043 ctxt->sax->initialized = 1;
13044 options -= XML_PARSE_SAX1;
13045 }
Daniel Veillard81273902003-09-30 00:43:48 +000013046#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013047 if (options & XML_PARSE_NODICT) {
13048 ctxt->dictNames = 0;
13049 options -= XML_PARSE_NODICT;
13050 } else {
13051 ctxt->dictNames = 1;
13052 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000013053 if (options & XML_PARSE_NOCDATA) {
13054 ctxt->sax->cdataBlock = NULL;
13055 options -= XML_PARSE_NOCDATA;
13056 }
13057 if (options & XML_PARSE_NSCLEAN) {
13058 ctxt->options |= XML_PARSE_NSCLEAN;
13059 options -= XML_PARSE_NSCLEAN;
13060 }
Daniel Veillard61b93382003-11-03 14:28:31 +000013061 if (options & XML_PARSE_NONET) {
13062 ctxt->options |= XML_PARSE_NONET;
13063 options -= XML_PARSE_NONET;
13064 }
Daniel Veillard8874b942005-08-25 13:19:21 +000013065 if (options & XML_PARSE_COMPACT) {
13066 ctxt->options |= XML_PARSE_COMPACT;
13067 options -= XML_PARSE_COMPACT;
13068 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000013069 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013070 return (options);
13071}
13072
13073/**
13074 * xmlDoRead:
13075 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000013076 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013077 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013078 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013079 * @reuse: keep the context for reuse
13080 *
13081 * Common front-end for the xmlRead functions
13082 *
13083 * Returns the resulting document tree or NULL
13084 */
13085static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013086xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
13087 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013088{
13089 xmlDocPtr ret;
13090
13091 xmlCtxtUseOptions(ctxt, options);
13092 if (encoding != NULL) {
13093 xmlCharEncodingHandlerPtr hdlr;
13094
13095 hdlr = xmlFindCharEncodingHandler(encoding);
13096 if (hdlr != NULL)
13097 xmlSwitchToEncoding(ctxt, hdlr);
13098 }
Daniel Veillard60942de2003-09-25 21:05:58 +000013099 if ((URL != NULL) && (ctxt->input != NULL) &&
13100 (ctxt->input->filename == NULL))
13101 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013102 xmlParseDocument(ctxt);
13103 if ((ctxt->wellFormed) || ctxt->recovery)
13104 ret = ctxt->myDoc;
13105 else {
13106 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013107 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013108 xmlFreeDoc(ctxt->myDoc);
13109 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013110 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013111 ctxt->myDoc = NULL;
13112 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013113 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013114 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013115
13116 return (ret);
13117}
13118
13119/**
13120 * xmlReadDoc:
13121 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013122 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013123 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013124 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013125 *
13126 * parse an XML in-memory document and build a tree.
13127 *
13128 * Returns the resulting document tree
13129 */
13130xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013131xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013132{
13133 xmlParserCtxtPtr ctxt;
13134
13135 if (cur == NULL)
13136 return (NULL);
13137
13138 ctxt = xmlCreateDocParserCtxt(cur);
13139 if (ctxt == NULL)
13140 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013141 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013142}
13143
13144/**
13145 * xmlReadFile:
13146 * @filename: a file or URL
13147 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013148 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013149 *
13150 * parse an XML file from the filesystem or the network.
13151 *
13152 * Returns the resulting document tree
13153 */
13154xmlDocPtr
13155xmlReadFile(const char *filename, const char *encoding, int options)
13156{
13157 xmlParserCtxtPtr ctxt;
13158
Daniel Veillard61b93382003-11-03 14:28:31 +000013159 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013160 if (ctxt == NULL)
13161 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013162 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013163}
13164
13165/**
13166 * xmlReadMemory:
13167 * @buffer: a pointer to a char array
13168 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013169 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013170 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013171 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013172 *
13173 * parse an XML in-memory document and build a tree.
13174 *
13175 * Returns the resulting document tree
13176 */
13177xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013178xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013179{
13180 xmlParserCtxtPtr ctxt;
13181
13182 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13183 if (ctxt == NULL)
13184 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000013185 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013186}
13187
13188/**
13189 * xmlReadFd:
13190 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013191 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013192 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013193 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013194 *
13195 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013196 * NOTE that the file descriptor will not be closed when the
13197 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013198 *
13199 * Returns the resulting document tree
13200 */
13201xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013202xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013203{
13204 xmlParserCtxtPtr ctxt;
13205 xmlParserInputBufferPtr input;
13206 xmlParserInputPtr stream;
13207
13208 if (fd < 0)
13209 return (NULL);
13210
13211 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13212 if (input == NULL)
13213 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013214 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013215 ctxt = xmlNewParserCtxt();
13216 if (ctxt == NULL) {
13217 xmlFreeParserInputBuffer(input);
13218 return (NULL);
13219 }
13220 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13221 if (stream == NULL) {
13222 xmlFreeParserInputBuffer(input);
13223 xmlFreeParserCtxt(ctxt);
13224 return (NULL);
13225 }
13226 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013227 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013228}
13229
13230/**
13231 * xmlReadIO:
13232 * @ioread: an I/O read function
13233 * @ioclose: an I/O close function
13234 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013235 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013236 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013237 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013238 *
13239 * parse an XML document from I/O functions and source and build a tree.
13240 *
13241 * Returns the resulting document tree
13242 */
13243xmlDocPtr
13244xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000013245 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013246{
13247 xmlParserCtxtPtr ctxt;
13248 xmlParserInputBufferPtr input;
13249 xmlParserInputPtr stream;
13250
13251 if (ioread == NULL)
13252 return (NULL);
13253
13254 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13255 XML_CHAR_ENCODING_NONE);
13256 if (input == NULL)
13257 return (NULL);
13258 ctxt = xmlNewParserCtxt();
13259 if (ctxt == NULL) {
13260 xmlFreeParserInputBuffer(input);
13261 return (NULL);
13262 }
13263 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13264 if (stream == NULL) {
13265 xmlFreeParserInputBuffer(input);
13266 xmlFreeParserCtxt(ctxt);
13267 return (NULL);
13268 }
13269 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013270 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013271}
13272
13273/**
13274 * xmlCtxtReadDoc:
13275 * @ctxt: an XML parser context
13276 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000013277 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013278 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013279 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013280 *
13281 * parse an XML in-memory document and build a tree.
13282 * This reuses the existing @ctxt parser context
13283 *
13284 * Returns the resulting document tree
13285 */
13286xmlDocPtr
13287xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000013288 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013289{
13290 xmlParserInputPtr stream;
13291
13292 if (cur == NULL)
13293 return (NULL);
13294 if (ctxt == NULL)
13295 return (NULL);
13296
13297 xmlCtxtReset(ctxt);
13298
13299 stream = xmlNewStringInputStream(ctxt, cur);
13300 if (stream == NULL) {
13301 return (NULL);
13302 }
13303 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013304 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013305}
13306
13307/**
13308 * xmlCtxtReadFile:
13309 * @ctxt: an XML parser context
13310 * @filename: a file or URL
13311 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013312 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013313 *
13314 * parse an XML file from the filesystem or the network.
13315 * This reuses the existing @ctxt parser context
13316 *
13317 * Returns the resulting document tree
13318 */
13319xmlDocPtr
13320xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13321 const char *encoding, int options)
13322{
13323 xmlParserInputPtr stream;
13324
13325 if (filename == NULL)
13326 return (NULL);
13327 if (ctxt == NULL)
13328 return (NULL);
13329
13330 xmlCtxtReset(ctxt);
13331
Daniel Veillard29614c72004-11-26 10:47:26 +000013332 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013333 if (stream == NULL) {
13334 return (NULL);
13335 }
13336 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013337 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013338}
13339
13340/**
13341 * xmlCtxtReadMemory:
13342 * @ctxt: an XML parser context
13343 * @buffer: a pointer to a char array
13344 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000013345 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013346 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013347 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013348 *
13349 * parse an XML in-memory document and build a tree.
13350 * This reuses the existing @ctxt parser context
13351 *
13352 * Returns the resulting document tree
13353 */
13354xmlDocPtr
13355xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000013356 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013357{
13358 xmlParserInputBufferPtr input;
13359 xmlParserInputPtr stream;
13360
13361 if (ctxt == NULL)
13362 return (NULL);
13363 if (buffer == NULL)
13364 return (NULL);
13365
13366 xmlCtxtReset(ctxt);
13367
13368 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13369 if (input == NULL) {
13370 return(NULL);
13371 }
13372
13373 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13374 if (stream == NULL) {
13375 xmlFreeParserInputBuffer(input);
13376 return(NULL);
13377 }
13378
13379 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013380 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013381}
13382
13383/**
13384 * xmlCtxtReadFd:
13385 * @ctxt: an XML parser context
13386 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013387 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013388 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013389 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013390 *
13391 * parse an XML from a file descriptor and build a tree.
13392 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013393 * NOTE that the file descriptor will not be closed when the
13394 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013395 *
13396 * Returns the resulting document tree
13397 */
13398xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013399xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13400 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013401{
13402 xmlParserInputBufferPtr input;
13403 xmlParserInputPtr stream;
13404
13405 if (fd < 0)
13406 return (NULL);
13407 if (ctxt == NULL)
13408 return (NULL);
13409
13410 xmlCtxtReset(ctxt);
13411
13412
13413 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13414 if (input == NULL)
13415 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013416 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013417 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13418 if (stream == NULL) {
13419 xmlFreeParserInputBuffer(input);
13420 return (NULL);
13421 }
13422 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013423 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013424}
13425
13426/**
13427 * xmlCtxtReadIO:
13428 * @ctxt: an XML parser context
13429 * @ioread: an I/O read function
13430 * @ioclose: an I/O close function
13431 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013432 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013433 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013434 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013435 *
13436 * parse an XML document from I/O functions and source and build a tree.
13437 * This reuses the existing @ctxt parser context
13438 *
13439 * Returns the resulting document tree
13440 */
13441xmlDocPtr
13442xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13443 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013444 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013445 const char *encoding, int options)
13446{
13447 xmlParserInputBufferPtr input;
13448 xmlParserInputPtr stream;
13449
13450 if (ioread == NULL)
13451 return (NULL);
13452 if (ctxt == NULL)
13453 return (NULL);
13454
13455 xmlCtxtReset(ctxt);
13456
13457 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13458 XML_CHAR_ENCODING_NONE);
13459 if (input == NULL)
13460 return (NULL);
13461 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13462 if (stream == NULL) {
13463 xmlFreeParserInputBuffer(input);
13464 return (NULL);
13465 }
13466 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013467 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013468}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000013469
13470#define bottom_parser
13471#include "elfgcchack.h"