blob: 6ee9159bd784e3deed2b4bd7e2a199e438567086 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000413 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000414 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000415 schannel = ctxt->sax->serror;
416 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000417 (ctxt->sax) ? ctxt->sax->warning : NULL,
418 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000419 ctxt, NULL, XML_FROM_PARSER, error,
420 XML_ERR_WARNING, NULL, 0,
421 (const char *) str1, (const char *) str2, NULL, 0, 0,
422 msg, (const char *) str1, (const char *) str2);
423}
424
425/**
426 * xmlValidityError:
427 * @ctxt: an XML parser context
428 * @error: the error number
429 * @msg: the error message
430 * @str1: extra data
431 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000432 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000433 */
434static void
435xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
436 const char *msg, const xmlChar *str1)
437{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000438 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000439
440 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
441 (ctxt->instate == XML_PARSER_EOF))
442 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000443 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000444 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000445 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000446 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000447 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000448 ctxt, NULL, XML_FROM_DTD, error,
449 XML_ERR_ERROR, NULL, 0, (const char *) str1,
450 NULL, NULL, 0, 0,
451 msg, (const char *) str1);
452 ctxt->valid = 0;
453}
454
455/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 * xmlFatalErrMsgInt:
457 * @ctxt: an XML parser context
458 * @error: the error number
459 * @msg: the error message
460 * @val: an integer value
461 *
462 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
463 */
464static void
465xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000467{
Daniel Veillard157fee02003-10-31 10:36:03 +0000468 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
469 (ctxt->instate == XML_PARSER_EOF))
470 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000471 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000472 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
474 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000475 ctxt->wellFormed = 0;
476 if (ctxt->recovery == 0)
477 ctxt->disableSAX = 1;
478}
479
480/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000481 * xmlFatalErrMsgStrIntStr:
482 * @ctxt: an XML parser context
483 * @error: the error number
484 * @msg: the error message
485 * @str1: an string info
486 * @val: an integer value
487 * @str2: an string info
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
492xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg, const xmlChar *str1, int val,
494 const xmlChar *str2)
495{
Daniel Veillard157fee02003-10-31 10:36:03 +0000496 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497 (ctxt->instate == XML_PARSER_EOF))
498 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000499 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000500 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000501 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
502 NULL, 0, (const char *) str1, (const char *) str2,
503 NULL, val, 0, msg, str1, val, str2);
504 ctxt->wellFormed = 0;
505 if (ctxt->recovery == 0)
506 ctxt->disableSAX = 1;
507}
508
509/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000510 * xmlFatalErrMsgStr:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @val: a string value
515 *
516 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
517 */
518static void
519xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000520 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000521{
Daniel Veillard157fee02003-10-31 10:36:03 +0000522 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
523 (ctxt->instate == XML_PARSER_EOF))
524 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000525 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000526 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 XML_FROM_PARSER, error, XML_ERR_FATAL,
528 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
529 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000530 ctxt->wellFormed = 0;
531 if (ctxt->recovery == 0)
532 ctxt->disableSAX = 1;
533}
534
535/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000536 * xmlErrMsgStr:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @val: a string value
541 *
542 * Handle a non fatal parser error
543 */
544static void
545xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
546 const char *msg, const xmlChar * val)
547{
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000551 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000553 XML_FROM_PARSER, error, XML_ERR_ERROR,
554 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
555 val);
556}
557
558/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000559 * xmlNsErr:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the message
563 * @info1: extra information string
564 * @info2: extra information string
565 *
566 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
567 */
568static void
569xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
570 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000571 const xmlChar * info1, const xmlChar * info2,
572 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000573{
Daniel Veillard157fee02003-10-31 10:36:03 +0000574 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
575 (ctxt->instate == XML_PARSER_EOF))
576 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000579 XML_ERR_ERROR, NULL, 0, (const char *) info1,
580 (const char *) info2, (const char *) info3, 0, 0, msg,
581 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 ctxt->nsWellFormed = 0;
583}
584
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000585/************************************************************************
586 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000587 * SAX2 defaulted attributes handling *
588 * *
589 ************************************************************************/
590
591/**
592 * xmlDetectSAX2:
593 * @ctxt: an XML parser context
594 *
595 * Do the SAX2 detection and specific intialization
596 */
597static void
598xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
599 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000601 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
602 ((ctxt->sax->startElementNs != NULL) ||
603 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000604#else
605 ctxt->sax2 = 1;
606#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000607
608 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
609 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
610 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000611 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
612 (ctxt->str_xml_ns == NULL)) {
613 xmlErrMemory(ctxt, NULL);
614 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000615}
616
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617typedef struct _xmlDefAttrs xmlDefAttrs;
618typedef xmlDefAttrs *xmlDefAttrsPtr;
619struct _xmlDefAttrs {
620 int nbAttrs; /* number of defaulted attributes on that element */
621 int maxAttrs; /* the size of the array */
622 const xmlChar *values[4]; /* array of localname/prefix/values */
623};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000624
625/**
626 * xmlAddDefAttrs:
627 * @ctxt: an XML parser context
628 * @fullname: the element fullname
629 * @fullattr: the attribute fullname
630 * @value: the attribute value
631 *
632 * Add a defaulted attribute for an element
633 */
634static void
635xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
636 const xmlChar *fullname,
637 const xmlChar *fullattr,
638 const xmlChar *value) {
639 xmlDefAttrsPtr defaults;
640 int len;
641 const xmlChar *name;
642 const xmlChar *prefix;
643
644 if (ctxt->attsDefault == NULL) {
645 ctxt->attsDefault = xmlHashCreate(10);
646 if (ctxt->attsDefault == NULL)
647 goto mem_error;
648 }
649
650 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000651 * split the element name into prefix:localname , the string found
652 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000653 */
654 name = xmlSplitQName3(fullname, &len);
655 if (name == NULL) {
656 name = xmlDictLookup(ctxt->dict, fullname, -1);
657 prefix = NULL;
658 } else {
659 name = xmlDictLookup(ctxt->dict, name, -1);
660 prefix = xmlDictLookup(ctxt->dict, fullname, len);
661 }
662
663 /*
664 * make sure there is some storage
665 */
666 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
667 if (defaults == NULL) {
668 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000669 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000670 if (defaults == NULL)
671 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000672 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000676 xmlDefAttrsPtr temp;
677
678 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000679 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000680 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000681 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000682 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000683 defaults->maxAttrs *= 2;
684 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
685 }
686
687 /*
688 * plit the element name into prefix:localname , the string found
689 * are within the DTD and hen not associated to namespace names.
690 */
691 name = xmlSplitQName3(fullattr, &len);
692 if (name == NULL) {
693 name = xmlDictLookup(ctxt->dict, fullattr, -1);
694 prefix = NULL;
695 } else {
696 name = xmlDictLookup(ctxt->dict, name, -1);
697 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
698 }
699
700 defaults->values[4 * defaults->nbAttrs] = name;
701 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
702 /* intern the string and precompute the end */
703 len = xmlStrlen(value);
704 value = xmlDictLookup(ctxt->dict, value, len);
705 defaults->values[4 * defaults->nbAttrs + 2] = value;
706 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
707 defaults->nbAttrs++;
708
709 return;
710
711mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000712 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000713 return;
714}
715
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000716/**
717 * xmlAddSpecialAttr:
718 * @ctxt: an XML parser context
719 * @fullname: the element fullname
720 * @fullattr: the attribute fullname
721 * @type: the attribute type
722 *
723 * Register that this attribute is not CDATA
724 */
725static void
726xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
727 const xmlChar *fullname,
728 const xmlChar *fullattr,
729 int type)
730{
731 if (ctxt->attsSpecial == NULL) {
732 ctxt->attsSpecial = xmlHashCreate(10);
733 if (ctxt->attsSpecial == NULL)
734 goto mem_error;
735 }
736
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000737 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
738 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000739 return;
740
741mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000742 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000743 return;
744}
745
Daniel Veillard4432df22003-09-28 18:58:27 +0000746/**
747 * xmlCheckLanguageID:
748 * @lang: pointer to the string value
749 *
750 * Checks that the value conforms to the LanguageID production:
751 *
752 * NOTE: this is somewhat deprecated, those productions were removed from
753 * the XML Second edition.
754 *
755 * [33] LanguageID ::= Langcode ('-' Subcode)*
756 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
757 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
758 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
759 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
760 * [38] Subcode ::= ([a-z] | [A-Z])+
761 *
762 * Returns 1 if correct 0 otherwise
763 **/
764int
765xmlCheckLanguageID(const xmlChar * lang)
766{
767 const xmlChar *cur = lang;
768
769 if (cur == NULL)
770 return (0);
771 if (((cur[0] == 'i') && (cur[1] == '-')) ||
772 ((cur[0] == 'I') && (cur[1] == '-'))) {
773 /*
774 * IANA code
775 */
776 cur += 2;
777 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
778 ((cur[0] >= 'a') && (cur[0] <= 'z')))
779 cur++;
780 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
781 ((cur[0] == 'X') && (cur[1] == '-'))) {
782 /*
783 * User code
784 */
785 cur += 2;
786 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
787 ((cur[0] >= 'a') && (cur[0] <= 'z')))
788 cur++;
789 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
790 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
791 /*
792 * ISO639
793 */
794 cur++;
795 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
796 ((cur[0] >= 'a') && (cur[0] <= 'z')))
797 cur++;
798 else
799 return (0);
800 } else
801 return (0);
802 while (cur[0] != 0) { /* non input consuming */
803 if (cur[0] != '-')
804 return (0);
805 cur++;
806 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
807 ((cur[0] >= 'a') && (cur[0] <= 'z')))
808 cur++;
809 else
810 return (0);
811 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
812 ((cur[0] >= 'a') && (cur[0] <= 'z')))
813 cur++;
814 }
815 return (1);
816}
817
Owen Taylor3473f882001-02-23 17:55:21 +0000818/************************************************************************
819 * *
820 * Parser stacks related functions and macros *
821 * *
822 ************************************************************************/
823
824xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
825 const xmlChar ** str);
826
Daniel Veillard0fb18932003-09-07 09:14:37 +0000827#ifdef SAX2
828/**
829 * nsPush:
830 * @ctxt: an XML parser context
831 * @prefix: the namespace prefix or NULL
832 * @URL: the namespace name
833 *
834 * Pushes a new parser namespace on top of the ns stack
835 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000836 * Returns -1 in case of error, -2 if the namespace should be discarded
837 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000838 */
839static int
840nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
841{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000842 if (ctxt->options & XML_PARSE_NSCLEAN) {
843 int i;
844 for (i = 0;i < ctxt->nsNr;i += 2) {
845 if (ctxt->nsTab[i] == prefix) {
846 /* in scope */
847 if (ctxt->nsTab[i + 1] == URL)
848 return(-2);
849 /* out of scope keep it */
850 break;
851 }
852 }
853 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000854 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
855 ctxt->nsMax = 10;
856 ctxt->nsNr = 0;
857 ctxt->nsTab = (const xmlChar **)
858 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax = 0;
862 return (-1);
863 }
864 } else if (ctxt->nsNr >= ctxt->nsMax) {
865 ctxt->nsMax *= 2;
866 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000867 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
869 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000870 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 ctxt->nsMax /= 2;
872 return (-1);
873 }
874 }
875 ctxt->nsTab[ctxt->nsNr++] = prefix;
876 ctxt->nsTab[ctxt->nsNr++] = URL;
877 return (ctxt->nsNr);
878}
879/**
880 * nsPop:
881 * @ctxt: an XML parser context
882 * @nr: the number to pop
883 *
884 * Pops the top @nr parser prefix/namespace from the ns stack
885 *
886 * Returns the number of namespaces removed
887 */
888static int
889nsPop(xmlParserCtxtPtr ctxt, int nr)
890{
891 int i;
892
893 if (ctxt->nsTab == NULL) return(0);
894 if (ctxt->nsNr < nr) {
895 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
896 nr = ctxt->nsNr;
897 }
898 if (ctxt->nsNr <= 0)
899 return (0);
900
901 for (i = 0;i < nr;i++) {
902 ctxt->nsNr--;
903 ctxt->nsTab[ctxt->nsNr] = NULL;
904 }
905 return(nr);
906}
907#endif
908
909static int
910xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
911 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000912 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000913 int maxatts;
914
915 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **)
918 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
922 if (attallocs == NULL) goto mem_error;
923 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000925 } else if (nr + 5 > ctxt->maxatts) {
926 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
928 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000929 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000930 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000931 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
932 (maxatts / 5) * sizeof(int));
933 if (attallocs == NULL) goto mem_error;
934 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000935 ctxt->maxatts = maxatts;
936 }
937 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000938mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000939 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000940 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000941}
942
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000943/**
944 * inputPush:
945 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000947 *
948 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000949 *
950 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000951 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000952int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
954{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000955 if ((ctxt == NULL) || (value == NULL))
956 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000957 if (ctxt->inputNr >= ctxt->inputMax) {
958 ctxt->inputMax *= 2;
959 ctxt->inputTab =
960 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
961 ctxt->inputMax *
962 sizeof(ctxt->inputTab[0]));
963 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000964 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000965 return (0);
966 }
967 }
968 ctxt->inputTab[ctxt->inputNr] = value;
969 ctxt->input = value;
970 return (ctxt->inputNr++);
971}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000972/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 * @ctxt: an XML parser context
975 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000976 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000977 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000979 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000980xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000981inputPop(xmlParserCtxtPtr ctxt)
982{
983 xmlParserInputPtr ret;
984
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000985 if (ctxt == NULL)
986 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000987 if (ctxt->inputNr <= 0)
988 return (0);
989 ctxt->inputNr--;
990 if (ctxt->inputNr > 0)
991 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
992 else
993 ctxt->input = NULL;
994 ret = ctxt->inputTab[ctxt->inputNr];
995 ctxt->inputTab[ctxt->inputNr] = 0;
996 return (ret);
997}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000998/**
999 * nodePush:
1000 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001002 *
1003 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001004 *
1005 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001006 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001007int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001008nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1009{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001010 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001011 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001012 xmlNodePtr *tmp;
1013
1014 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1015 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001016 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001017 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001018 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001019 return (0);
1020 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001021 ctxt->nodeTab = tmp;
1022 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001023 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001024 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001025 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001026 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1027 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001029 return(0);
1030 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001031 ctxt->nodeTab[ctxt->nodeNr] = value;
1032 ctxt->node = value;
1033 return (ctxt->nodeNr++);
1034}
1035/**
1036 * nodePop:
1037 * @ctxt: an XML parser context
1038 *
1039 * Pops the top element node from the node stack
1040 *
1041 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001042 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001043xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001044nodePop(xmlParserCtxtPtr ctxt)
1045{
1046 xmlNodePtr ret;
1047
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001048 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001049 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001050 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001051 ctxt->nodeNr--;
1052 if (ctxt->nodeNr > 0)
1053 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1054 else
1055 ctxt->node = NULL;
1056 ret = ctxt->nodeTab[ctxt->nodeNr];
1057 ctxt->nodeTab[ctxt->nodeNr] = 0;
1058 return (ret);
1059}
Daniel Veillarda2351322004-06-27 12:08:10 +00001060
1061#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001062/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001063 * nameNsPush:
1064 * @ctxt: an XML parser context
1065 * @value: the element name
1066 * @prefix: the element prefix
1067 * @URI: the element namespace name
1068 *
1069 * Pushes a new element name/prefix/URL on top of the name stack
1070 *
1071 * Returns -1 in case of error, the index in the stack otherwise
1072 */
1073static int
1074nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1075 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1076{
1077 if (ctxt->nameNr >= ctxt->nameMax) {
1078 const xmlChar * *tmp;
1079 void **tmp2;
1080 ctxt->nameMax *= 2;
1081 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1082 ctxt->nameMax *
1083 sizeof(ctxt->nameTab[0]));
1084 if (tmp == NULL) {
1085 ctxt->nameMax /= 2;
1086 goto mem_error;
1087 }
1088 ctxt->nameTab = tmp;
1089 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1090 ctxt->nameMax * 3 *
1091 sizeof(ctxt->pushTab[0]));
1092 if (tmp2 == NULL) {
1093 ctxt->nameMax /= 2;
1094 goto mem_error;
1095 }
1096 ctxt->pushTab = tmp2;
1097 }
1098 ctxt->nameTab[ctxt->nameNr] = value;
1099 ctxt->name = value;
1100 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1101 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001102 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 return (ctxt->nameNr++);
1104mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001105 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001106 return (-1);
1107}
1108/**
1109 * nameNsPop:
1110 * @ctxt: an XML parser context
1111 *
1112 * Pops the top element/prefix/URI name from the name stack
1113 *
1114 * Returns the name just removed
1115 */
1116static const xmlChar *
1117nameNsPop(xmlParserCtxtPtr ctxt)
1118{
1119 const xmlChar *ret;
1120
1121 if (ctxt->nameNr <= 0)
1122 return (0);
1123 ctxt->nameNr--;
1124 if (ctxt->nameNr > 0)
1125 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1126 else
1127 ctxt->name = NULL;
1128 ret = ctxt->nameTab[ctxt->nameNr];
1129 ctxt->nameTab[ctxt->nameNr] = NULL;
1130 return (ret);
1131}
Daniel Veillarda2351322004-06-27 12:08:10 +00001132#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001133
1134/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001135 * namePush:
1136 * @ctxt: an XML parser context
1137 * @value: the element name
1138 *
1139 * Pushes a new element name on top of the name stack
1140 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001142 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001143int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001144namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001145{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001146 if (ctxt == NULL) return (-1);
1147
Daniel Veillard1c732d22002-11-30 11:22:59 +00001148 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001151 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001152 ctxt->nameMax *
1153 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (tmp == NULL) {
1155 ctxt->nameMax /= 2;
1156 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001157 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001159 }
1160 ctxt->nameTab[ctxt->nameNr] = value;
1161 ctxt->name = value;
1162 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001164 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001165 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001166}
1167/**
1168 * namePop:
1169 * @ctxt: an XML parser context
1170 *
1171 * Pops the top element name from the name stack
1172 *
1173 * Returns the name just removed
1174 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001175const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001176namePop(xmlParserCtxtPtr ctxt)
1177{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001178 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001179
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001180 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1181 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001182 ctxt->nameNr--;
1183 if (ctxt->nameNr > 0)
1184 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1185 else
1186 ctxt->name = NULL;
1187 ret = ctxt->nameTab[ctxt->nameNr];
1188 ctxt->nameTab[ctxt->nameNr] = 0;
1189 return (ret);
1190}
Owen Taylor3473f882001-02-23 17:55:21 +00001191
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001192static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001193 if (ctxt->spaceNr >= ctxt->spaceMax) {
1194 ctxt->spaceMax *= 2;
1195 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1196 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1197 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001198 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001199 return(0);
1200 }
1201 }
1202 ctxt->spaceTab[ctxt->spaceNr] = val;
1203 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1204 return(ctxt->spaceNr++);
1205}
1206
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001207static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001208 int ret;
1209 if (ctxt->spaceNr <= 0) return(0);
1210 ctxt->spaceNr--;
1211 if (ctxt->spaceNr > 0)
1212 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1213 else
1214 ctxt->space = NULL;
1215 ret = ctxt->spaceTab[ctxt->spaceNr];
1216 ctxt->spaceTab[ctxt->spaceNr] = -1;
1217 return(ret);
1218}
1219
1220/*
1221 * Macros for accessing the content. Those should be used only by the parser,
1222 * and not exported.
1223 *
1224 * Dirty macros, i.e. one often need to make assumption on the context to
1225 * use them
1226 *
1227 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1228 * To be used with extreme caution since operations consuming
1229 * characters may move the input buffer to a different location !
1230 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1231 * This should be used internally by the parser
1232 * only to compare to ASCII values otherwise it would break when
1233 * running with UTF-8 encoding.
1234 * RAW same as CUR but in the input buffer, bypass any token
1235 * extraction that may have been done
1236 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1237 * to compare on ASCII based substring.
1238 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001239 * strings without newlines within the parser.
1240 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1241 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001242 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1243 *
1244 * NEXT Skip to the next character, this does the proper decoding
1245 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001246 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001247 * CUR_CHAR(l) returns the current unicode character (int), set l
1248 * to the number of xmlChars used for the encoding [0-5].
1249 * CUR_SCHAR same but operate on a string instead of the context
1250 * COPY_BUF copy the current unicode char to the target buffer, increment
1251 * the index
1252 * GROW, SHRINK handling of input buffers
1253 */
1254
Daniel Veillardfdc91562002-07-01 21:52:03 +00001255#define RAW (*ctxt->input->cur)
1256#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001257#define NXT(val) ctxt->input->cur[(val)]
1258#define CUR_PTR ctxt->input->cur
1259
Daniel Veillarda07050d2003-10-19 14:46:32 +00001260#define CMP4( s, c1, c2, c3, c4 ) \
1261 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1262 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1263#define CMP5( s, c1, c2, c3, c4, c5 ) \
1264 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1265#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1266 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1267#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1268 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1269#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1270 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1271#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1272 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1273 ((unsigned char *) s)[ 8 ] == c9 )
1274#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1275 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1276 ((unsigned char *) s)[ 9 ] == c10 )
1277
Owen Taylor3473f882001-02-23 17:55:21 +00001278#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001279 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001280 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001281 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1283 xmlPopInput(ctxt); \
1284 } while (0)
1285
Daniel Veillard0b787f32004-03-26 17:29:53 +00001286#define SKIPL(val) do { \
1287 int skipl; \
1288 for(skipl=0; skipl<val; skipl++) { \
1289 if (*(ctxt->input->cur) == '\n') { \
1290 ctxt->input->line++; ctxt->input->col = 1; \
1291 } else ctxt->input->col++; \
1292 ctxt->nbChars++; \
1293 ctxt->input->cur++; \
1294 } \
1295 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1296 if ((*ctxt->input->cur == 0) && \
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1298 xmlPopInput(ctxt); \
1299 } while (0)
1300
Daniel Veillarda880b122003-04-21 21:36:41 +00001301#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001302 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1303 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001304 xmlSHRINK (ctxt);
1305
1306static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1307 xmlParserInputShrink(ctxt->input);
1308 if ((*ctxt->input->cur == 0) &&
1309 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1310 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001311 }
Owen Taylor3473f882001-02-23 17:55:21 +00001312
Daniel Veillarda880b122003-04-21 21:36:41 +00001313#define GROW if ((ctxt->progressive == 0) && \
1314 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001315 xmlGROW (ctxt);
1316
1317static void xmlGROW (xmlParserCtxtPtr ctxt) {
1318 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1319 if ((*ctxt->input->cur == 0) &&
1320 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1321 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001322}
Owen Taylor3473f882001-02-23 17:55:21 +00001323
1324#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1325
1326#define NEXT xmlNextChar(ctxt)
1327
Daniel Veillard21a0f912001-02-25 19:54:14 +00001328#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001329 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001330 ctxt->input->cur++; \
1331 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001332 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001333 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1334 }
1335
Owen Taylor3473f882001-02-23 17:55:21 +00001336#define NEXTL(l) do { \
1337 if (*(ctxt->input->cur) == '\n') { \
1338 ctxt->input->line++; ctxt->input->col = 1; \
1339 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001340 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001342 } while (0)
1343
1344#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1345#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1346
1347#define COPY_BUF(l,b,i,v) \
1348 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001349 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001350
1351/**
1352 * xmlSkipBlankChars:
1353 * @ctxt: the XML parser context
1354 *
1355 * skip all blanks character found at that point in the input streams.
1356 * It pops up finished entities in the process if allowable at that point.
1357 *
1358 * Returns the number of space chars skipped
1359 */
1360
1361int
1362xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001364
1365 /*
1366 * It's Okay to use CUR/NEXT here since all the blanks are on
1367 * the ASCII range.
1368 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001369 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1370 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001371 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001372 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001373 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001374 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001375 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001376 if (*cur == '\n') {
1377 ctxt->input->line++; ctxt->input->col = 1;
1378 }
1379 cur++;
1380 res++;
1381 if (*cur == 0) {
1382 ctxt->input->cur = cur;
1383 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1384 cur = ctxt->input->cur;
1385 }
1386 }
1387 ctxt->input->cur = cur;
1388 } else {
1389 int cur;
1390 do {
1391 cur = CUR;
1392 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1393 NEXT;
1394 cur = CUR;
1395 res++;
1396 }
1397 while ((cur == 0) && (ctxt->inputNr > 1) &&
1398 (ctxt->instate != XML_PARSER_COMMENT)) {
1399 xmlPopInput(ctxt);
1400 cur = CUR;
1401 }
1402 /*
1403 * Need to handle support of entities branching here
1404 */
1405 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1406 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1407 }
Owen Taylor3473f882001-02-23 17:55:21 +00001408 return(res);
1409}
1410
1411/************************************************************************
1412 * *
1413 * Commodity functions to handle entities *
1414 * *
1415 ************************************************************************/
1416
1417/**
1418 * xmlPopInput:
1419 * @ctxt: an XML parser context
1420 *
1421 * xmlPopInput: the current input pointed by ctxt->input came to an end
1422 * pop it and return the next char.
1423 *
1424 * Returns the current xmlChar in the parser context
1425 */
1426xmlChar
1427xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001428 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001429 if (xmlParserDebugEntities)
1430 xmlGenericError(xmlGenericErrorContext,
1431 "Popping input %d\n", ctxt->inputNr);
1432 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001433 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001434 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1435 return(xmlPopInput(ctxt));
1436 return(CUR);
1437}
1438
1439/**
1440 * xmlPushInput:
1441 * @ctxt: an XML parser context
1442 * @input: an XML parser input fragment (entity, XML fragment ...).
1443 *
1444 * xmlPushInput: switch to a new input stream which is stacked on top
1445 * of the previous one(s).
1446 */
1447void
1448xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1449 if (input == NULL) return;
1450
1451 if (xmlParserDebugEntities) {
1452 if ((ctxt->input != NULL) && (ctxt->input->filename))
1453 xmlGenericError(xmlGenericErrorContext,
1454 "%s(%d): ", ctxt->input->filename,
1455 ctxt->input->line);
1456 xmlGenericError(xmlGenericErrorContext,
1457 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1458 }
1459 inputPush(ctxt, input);
1460 GROW;
1461}
1462
1463/**
1464 * xmlParseCharRef:
1465 * @ctxt: an XML parser context
1466 *
1467 * parse Reference declarations
1468 *
1469 * [66] CharRef ::= '&#' [0-9]+ ';' |
1470 * '&#x' [0-9a-fA-F]+ ';'
1471 *
1472 * [ WFC: Legal Character ]
1473 * Characters referred to using character references must match the
1474 * production for Char.
1475 *
1476 * Returns the value parsed (as an int), 0 in case of error
1477 */
1478int
1479xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001480 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001481 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001482 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001483
Owen Taylor3473f882001-02-23 17:55:21 +00001484 /*
1485 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1486 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001487 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001488 (NXT(2) == 'x')) {
1489 SKIP(3);
1490 GROW;
1491 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001492 if (count++ > 20) {
1493 count = 0;
1494 GROW;
1495 }
1496 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001497 val = val * 16 + (CUR - '0');
1498 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1499 val = val * 16 + (CUR - 'a') + 10;
1500 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1501 val = val * 16 + (CUR - 'A') + 10;
1502 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 val = 0;
1505 break;
1506 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001507 if (val > 0x10FFFF)
1508 outofrange = val;
1509
Owen Taylor3473f882001-02-23 17:55:21 +00001510 NEXT;
1511 count++;
1512 }
1513 if (RAW == ';') {
1514 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001515 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001516 ctxt->nbChars ++;
1517 ctxt->input->cur++;
1518 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001519 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 SKIP(2);
1521 GROW;
1522 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001523 if (count++ > 20) {
1524 count = 0;
1525 GROW;
1526 }
1527 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = val * 10 + (CUR - '0');
1529 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001530 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 val = 0;
1532 break;
1533 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001534 if (val > 0x10FFFF)
1535 outofrange = val;
1536
Owen Taylor3473f882001-02-23 17:55:21 +00001537 NEXT;
1538 count++;
1539 }
1540 if (RAW == ';') {
1541 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001542 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001543 ctxt->nbChars ++;
1544 ctxt->input->cur++;
1545 }
1546 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001548 }
1549
1550 /*
1551 * [ WFC: Legal Character ]
1552 * Characters referred to using character references must match the
1553 * production for Char.
1554 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001555 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001556 return(val);
1557 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001558 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1559 "xmlParseCharRef: invalid xmlChar value %d\n",
1560 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001561 }
1562 return(0);
1563}
1564
1565/**
1566 * xmlParseStringCharRef:
1567 * @ctxt: an XML parser context
1568 * @str: a pointer to an index in the string
1569 *
1570 * parse Reference declarations, variant parsing from a string rather
1571 * than an an input flow.
1572 *
1573 * [66] CharRef ::= '&#' [0-9]+ ';' |
1574 * '&#x' [0-9a-fA-F]+ ';'
1575 *
1576 * [ WFC: Legal Character ]
1577 * Characters referred to using character references must match the
1578 * production for Char.
1579 *
1580 * Returns the value parsed (as an int), 0 in case of error, str will be
1581 * updated to the current value of the index
1582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001583static int
Owen Taylor3473f882001-02-23 17:55:21 +00001584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1585 const xmlChar *ptr;
1586 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001587 unsigned int val = 0;
1588 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001589
1590 if ((str == NULL) || (*str == NULL)) return(0);
1591 ptr = *str;
1592 cur = *ptr;
1593 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1594 ptr += 3;
1595 cur = *ptr;
1596 while (cur != ';') { /* Non input consuming loop */
1597 if ((cur >= '0') && (cur <= '9'))
1598 val = val * 16 + (cur - '0');
1599 else if ((cur >= 'a') && (cur <= 'f'))
1600 val = val * 16 + (cur - 'a') + 10;
1601 else if ((cur >= 'A') && (cur <= 'F'))
1602 val = val * 16 + (cur - 'A') + 10;
1603 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001604 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001605 val = 0;
1606 break;
1607 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001608 if (val > 0x10FFFF)
1609 outofrange = val;
1610
Owen Taylor3473f882001-02-23 17:55:21 +00001611 ptr++;
1612 cur = *ptr;
1613 }
1614 if (cur == ';')
1615 ptr++;
1616 } else if ((cur == '&') && (ptr[1] == '#')){
1617 ptr += 2;
1618 cur = *ptr;
1619 while (cur != ';') { /* Non input consuming loops */
1620 if ((cur >= '0') && (cur <= '9'))
1621 val = val * 10 + (cur - '0');
1622 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001624 val = 0;
1625 break;
1626 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001627 if (val > 0x10FFFF)
1628 outofrange = val;
1629
Owen Taylor3473f882001-02-23 17:55:21 +00001630 ptr++;
1631 cur = *ptr;
1632 }
1633 if (cur == ';')
1634 ptr++;
1635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001636 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001637 return(0);
1638 }
1639 *str = ptr;
1640
1641 /*
1642 * [ WFC: Legal Character ]
1643 * Characters referred to using character references must match the
1644 * production for Char.
1645 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001646 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001647 return(val);
1648 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001649 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1650 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1651 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 }
1653 return(0);
1654}
1655
1656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001657 * xmlNewBlanksWrapperInputStream:
1658 * @ctxt: an XML parser context
1659 * @entity: an Entity pointer
1660 *
1661 * Create a new input stream for wrapping
1662 * blanks around a PEReference
1663 *
1664 * Returns the new input stream or NULL
1665 */
1666
1667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1668
Daniel Veillardf4862f02002-09-10 11:13:43 +00001669static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1671 xmlParserInputPtr input;
1672 xmlChar *buffer;
1673 size_t length;
1674 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001675 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1676 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001677 return(NULL);
1678 }
1679 if (xmlParserDebugEntities)
1680 xmlGenericError(xmlGenericErrorContext,
1681 "new blanks wrapper for entity: %s\n", entity->name);
1682 input = xmlNewInputStream(ctxt);
1683 if (input == NULL) {
1684 return(NULL);
1685 }
1686 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001687 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001688 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001689 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001690 return(NULL);
1691 }
1692 buffer [0] = ' ';
1693 buffer [1] = '%';
1694 buffer [length-3] = ';';
1695 buffer [length-2] = ' ';
1696 buffer [length-1] = 0;
1697 memcpy(buffer + 2, entity->name, length - 5);
1698 input->free = deallocblankswrapper;
1699 input->base = buffer;
1700 input->cur = buffer;
1701 input->length = length;
1702 input->end = &buffer[length];
1703 return(input);
1704}
1705
1706/**
Owen Taylor3473f882001-02-23 17:55:21 +00001707 * xmlParserHandlePEReference:
1708 * @ctxt: the parser context
1709 *
1710 * [69] PEReference ::= '%' Name ';'
1711 *
1712 * [ WFC: No Recursion ]
1713 * A parsed entity must not contain a recursive
1714 * reference to itself, either directly or indirectly.
1715 *
1716 * [ WFC: Entity Declared ]
1717 * In a document without any DTD, a document with only an internal DTD
1718 * subset which contains no parameter entity references, or a document
1719 * with "standalone='yes'", ... ... The declaration of a parameter
1720 * entity must precede any reference to it...
1721 *
1722 * [ VC: Entity Declared ]
1723 * In a document with an external subset or external parameter entities
1724 * with "standalone='no'", ... ... The declaration of a parameter entity
1725 * must precede any reference to it...
1726 *
1727 * [ WFC: In DTD ]
1728 * Parameter-entity references may only appear in the DTD.
1729 * NOTE: misleading but this is handled.
1730 *
1731 * A PEReference may have been detected in the current input stream
1732 * the handling is done accordingly to
1733 * http://www.w3.org/TR/REC-xml#entproc
1734 * i.e.
1735 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001736 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001737 */
1738void
1739xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001741 xmlEntityPtr entity = NULL;
1742 xmlParserInputPtr input;
1743
Owen Taylor3473f882001-02-23 17:55:21 +00001744 if (RAW != '%') return;
1745 switch(ctxt->instate) {
1746 case XML_PARSER_CDATA_SECTION:
1747 return;
1748 case XML_PARSER_COMMENT:
1749 return;
1750 case XML_PARSER_START_TAG:
1751 return;
1752 case XML_PARSER_END_TAG:
1753 return;
1754 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001755 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001756 return;
1757 case XML_PARSER_PROLOG:
1758 case XML_PARSER_START:
1759 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001760 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return;
1762 case XML_PARSER_ENTITY_DECL:
1763 case XML_PARSER_CONTENT:
1764 case XML_PARSER_ATTRIBUTE_VALUE:
1765 case XML_PARSER_PI:
1766 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001767 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001768 /* we just ignore it there */
1769 return;
1770 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001771 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001772 return;
1773 case XML_PARSER_ENTITY_VALUE:
1774 /*
1775 * NOTE: in the case of entity values, we don't do the
1776 * substitution here since we need the literal
1777 * entity value to be able to save the internal
1778 * subset of the document.
1779 * This will be handled by xmlStringDecodeEntities
1780 */
1781 return;
1782 case XML_PARSER_DTD:
1783 /*
1784 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1785 * In the internal DTD subset, parameter-entity references
1786 * can occur only where markup declarations can occur, not
1787 * within markup declarations.
1788 * In that case this is handled in xmlParseMarkupDecl
1789 */
1790 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1791 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001792 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001793 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001794 break;
1795 case XML_PARSER_IGNORE:
1796 return;
1797 }
1798
1799 NEXT;
1800 name = xmlParseName(ctxt);
1801 if (xmlParserDebugEntities)
1802 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001803 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001804 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001805 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001806 } else {
1807 if (RAW == ';') {
1808 NEXT;
1809 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1810 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1811 if (entity == NULL) {
1812
1813 /*
1814 * [ WFC: Entity Declared ]
1815 * In a document without any DTD, a document with only an
1816 * internal DTD subset which contains no parameter entity
1817 * references, or a document with "standalone='yes'", ...
1818 * ... The declaration of a parameter entity must precede
1819 * any reference to it...
1820 */
1821 if ((ctxt->standalone == 1) ||
1822 ((ctxt->hasExternalSubset == 0) &&
1823 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001825 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001826 } else {
1827 /*
1828 * [ VC: Entity Declared ]
1829 * In a document with an external subset or external
1830 * parameter entities with "standalone='no'", ...
1831 * ... The declaration of a parameter entity must precede
1832 * any reference to it...
1833 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001834 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1835 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1836 "PEReference: %%%s; not found\n",
1837 name);
1838 } else
1839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1840 "PEReference: %%%s; not found\n",
1841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 ctxt->valid = 0;
1843 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001844 } else if (ctxt->input->free != deallocblankswrapper) {
1845 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1846 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001847 } else {
1848 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1849 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001850 xmlChar start[4];
1851 xmlCharEncoding enc;
1852
Owen Taylor3473f882001-02-23 17:55:21 +00001853 /*
1854 * handle the extra spaces added before and after
1855 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001856 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001857 */
1858 input = xmlNewEntityInputStream(ctxt, entity);
1859 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860
1861 /*
1862 * Get the 4 first bytes and decode the charset
1863 * if enc != XML_CHAR_ENCODING_NONE
1864 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001865 * Note that, since we may have some non-UTF8
1866 * encoding (like UTF16, bug 135229), the 'length'
1867 * is not known, but we can calculate based upon
1868 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001869 */
1870 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001871 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001872 start[0] = RAW;
1873 start[1] = NXT(1);
1874 start[2] = NXT(2);
1875 start[3] = NXT(3);
1876 enc = xmlDetectCharEncoding(start, 4);
1877 if (enc != XML_CHAR_ENCODING_NONE) {
1878 xmlSwitchEncoding(ctxt, enc);
1879 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001880 }
1881
Owen Taylor3473f882001-02-23 17:55:21 +00001882 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001883 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1884 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001885 xmlParseTextDecl(ctxt);
1886 }
Owen Taylor3473f882001-02-23 17:55:21 +00001887 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001888 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1889 "PEReference: %s is not a parameter entity\n",
1890 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 }
1892 }
1893 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
Owen Taylor3473f882001-02-23 17:55:21 +00001896 }
1897}
1898
1899/*
1900 * Macro used to grow the current buffer.
1901 */
1902#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001903 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001904 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001905 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001906 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001907 if (tmp == NULL) goto mem_error; \
1908 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001909}
1910
1911/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001912 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001913 * @ctxt: the parser context
1914 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001916 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1917 * @end: an end marker xmlChar, 0 if none
1918 * @end2: an end marker xmlChar, 0 if none
1919 * @end3: an end marker xmlChar, 0 if none
1920 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001921 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001922 *
1923 * [67] Reference ::= EntityRef | CharRef
1924 *
1925 * [69] PEReference ::= '%' Name ';'
1926 *
1927 * Returns A newly allocated string with the substitution done. The caller
1928 * must deallocate it !
1929 */
1930xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1932 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001933 xmlChar *buffer = NULL;
1934 int buffer_size = 0;
1935
1936 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001937 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001938 xmlEntityPtr ent;
1939 int c,l;
1940 int nbchars = 0;
1941
Daniel Veillarda82b1822004-11-08 16:24:57 +00001942 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001944 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001945
1946 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001947 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 return(NULL);
1949 }
1950
1951 /*
1952 * allocate a translation buffer.
1953 */
1954 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001955 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001956 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001957
1958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001959 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001960 * we are operating on already parsed values.
1961 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001962 if (str < last)
1963 c = CUR_SCHAR(str, l);
1964 else
1965 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 while ((c != 0) && (c != end) && /* non input consuming loop */
1967 (c != end2) && (c != end3)) {
1968
1969 if (c == 0) break;
1970 if ((c == '&') && (str[1] == '#')) {
1971 int val = xmlParseStringCharRef(ctxt, &str);
1972 if (val != 0) {
1973 COPY_BUF(0,buffer,nbchars,val);
1974 }
1975 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1976 if (xmlParserDebugEntities)
1977 xmlGenericError(xmlGenericErrorContext,
1978 "String decoding Entity Reference: %.30s\n",
1979 str);
1980 ent = xmlParseStringEntityRef(ctxt, &str);
1981 if ((ent != NULL) &&
1982 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1983 if (ent->content != NULL) {
1984 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1985 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001986 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1987 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001988 }
1989 } else if ((ent != NULL) && (ent->content != NULL)) {
1990 xmlChar *rep;
1991
1992 ctxt->depth++;
1993 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1994 0, 0, 0);
1995 ctxt->depth--;
1996 if (rep != NULL) {
1997 current = rep;
1998 while (*current != 0) { /* non input consuming loop */
1999 buffer[nbchars++] = *current++;
2000 if (nbchars >
2001 buffer_size - XML_PARSER_BUFFER_SIZE) {
2002 growBuffer(buffer);
2003 }
2004 }
2005 xmlFree(rep);
2006 }
2007 } else if (ent != NULL) {
2008 int i = xmlStrlen(ent->name);
2009 const xmlChar *cur = ent->name;
2010
2011 buffer[nbchars++] = '&';
2012 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2013 growBuffer(buffer);
2014 }
2015 for (;i > 0;i--)
2016 buffer[nbchars++] = *cur++;
2017 buffer[nbchars++] = ';';
2018 }
2019 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2020 if (xmlParserDebugEntities)
2021 xmlGenericError(xmlGenericErrorContext,
2022 "String decoding PE Reference: %.30s\n", str);
2023 ent = xmlParseStringPEReference(ctxt, &str);
2024 if (ent != NULL) {
2025 xmlChar *rep;
2026
2027 ctxt->depth++;
2028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2029 0, 0, 0);
2030 ctxt->depth--;
2031 if (rep != NULL) {
2032 current = rep;
2033 while (*current != 0) { /* non input consuming loop */
2034 buffer[nbchars++] = *current++;
2035 if (nbchars >
2036 buffer_size - XML_PARSER_BUFFER_SIZE) {
2037 growBuffer(buffer);
2038 }
2039 }
2040 xmlFree(rep);
2041 }
2042 }
2043 } else {
2044 COPY_BUF(l,buffer,nbchars,c);
2045 str += l;
2046 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2047 growBuffer(buffer);
2048 }
2049 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002050 if (str < last)
2051 c = CUR_SCHAR(str, l);
2052 else
2053 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002054 }
2055 buffer[nbchars++] = 0;
2056 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002057
2058mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002059 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002060 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002061}
2062
Daniel Veillarde57ec792003-09-10 10:50:59 +00002063/**
2064 * xmlStringDecodeEntities:
2065 * @ctxt: the parser context
2066 * @str: the input string
2067 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2068 * @end: an end marker xmlChar, 0 if none
2069 * @end2: an end marker xmlChar, 0 if none
2070 * @end3: an end marker xmlChar, 0 if none
2071 *
2072 * Takes a entity string content and process to do the adequate substitutions.
2073 *
2074 * [67] Reference ::= EntityRef | CharRef
2075 *
2076 * [69] PEReference ::= '%' Name ';'
2077 *
2078 * Returns A newly allocated string with the substitution done. The caller
2079 * must deallocate it !
2080 */
2081xmlChar *
2082xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2083 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002084 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002085 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2086 end, end2, end3));
2087}
Owen Taylor3473f882001-02-23 17:55:21 +00002088
2089/************************************************************************
2090 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002091 * Commodity functions, cleanup needed ? *
2092 * *
2093 ************************************************************************/
2094
2095/**
2096 * areBlanks:
2097 * @ctxt: an XML parser context
2098 * @str: a xmlChar *
2099 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002100 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002101 *
2102 * Is this a sequence of blank chars that one can ignore ?
2103 *
2104 * Returns 1 if ignorable 0 otherwise.
2105 */
2106
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002107static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2108 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002109 int i, ret;
2110 xmlNodePtr lastChild;
2111
Daniel Veillard05c13a22001-09-09 08:38:09 +00002112 /*
2113 * Don't spend time trying to differentiate them, the same callback is
2114 * used !
2115 */
2116 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002117 return(0);
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 /*
2120 * Check for xml:space value.
2121 */
2122 if (*(ctxt->space) == 1)
2123 return(0);
2124
2125 /*
2126 * Check that the string is made of blanks
2127 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002128 if (blank_chars == 0) {
2129 for (i = 0;i < len;i++)
2130 if (!(IS_BLANK_CH(str[i]))) return(0);
2131 }
Owen Taylor3473f882001-02-23 17:55:21 +00002132
2133 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002134 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002135 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002136 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if (ctxt->myDoc != NULL) {
2138 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2139 if (ret == 0) return(1);
2140 if (ret == 1) return(0);
2141 }
2142
2143 /*
2144 * Otherwise, heuristic :-\
2145 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002146 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002147 if ((ctxt->node->children == NULL) &&
2148 (RAW == '<') && (NXT(1) == '/')) return(0);
2149
2150 lastChild = xmlGetLastChild(ctxt->node);
2151 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002152 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2153 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002154 } else if (xmlNodeIsText(lastChild))
2155 return(0);
2156 else if ((ctxt->node->children != NULL) &&
2157 (xmlNodeIsText(ctxt->node->children)))
2158 return(0);
2159 return(1);
2160}
2161
Owen Taylor3473f882001-02-23 17:55:21 +00002162/************************************************************************
2163 * *
2164 * Extra stuff for namespace support *
2165 * Relates to http://www.w3.org/TR/WD-xml-names *
2166 * *
2167 ************************************************************************/
2168
2169/**
2170 * xmlSplitQName:
2171 * @ctxt: an XML parser context
2172 * @name: an XML parser context
2173 * @prefix: a xmlChar **
2174 *
2175 * parse an UTF8 encoded XML qualified name string
2176 *
2177 * [NS 5] QName ::= (Prefix ':')? LocalPart
2178 *
2179 * [NS 6] Prefix ::= NCName
2180 *
2181 * [NS 7] LocalPart ::= NCName
2182 *
2183 * Returns the local part, and prefix is updated
2184 * to get the Prefix if any.
2185 */
2186
2187xmlChar *
2188xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2189 xmlChar buf[XML_MAX_NAMELEN + 5];
2190 xmlChar *buffer = NULL;
2191 int len = 0;
2192 int max = XML_MAX_NAMELEN;
2193 xmlChar *ret = NULL;
2194 const xmlChar *cur = name;
2195 int c;
2196
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002197 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002198 *prefix = NULL;
2199
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002200 if (cur == NULL) return(NULL);
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202#ifndef XML_XML_NAMESPACE
2203 /* xml: prefix is not really a namespace */
2204 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2205 (cur[2] == 'l') && (cur[3] == ':'))
2206 return(xmlStrdup(name));
2207#endif
2208
Daniel Veillard597bc482003-07-24 16:08:28 +00002209 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002210 if (cur[0] == ':')
2211 return(xmlStrdup(name));
2212
2213 c = *cur++;
2214 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2215 buf[len++] = c;
2216 c = *cur++;
2217 }
2218 if (len >= max) {
2219 /*
2220 * Okay someone managed to make a huge name, so he's ready to pay
2221 * for the processing speed.
2222 */
2223 max = len * 2;
2224
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002225 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002226 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002227 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(NULL);
2229 }
2230 memcpy(buffer, buf, len);
2231 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2232 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002233 xmlChar *tmp;
2234
Owen Taylor3473f882001-02-23 17:55:21 +00002235 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002236 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002237 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002238 if (tmp == NULL) {
2239 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002243 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002244 }
2245 buffer[len++] = c;
2246 c = *cur++;
2247 }
2248 buffer[len] = 0;
2249 }
2250
Daniel Veillard597bc482003-07-24 16:08:28 +00002251 /* nasty but well=formed
2252 if ((c == ':') && (*cur == 0)) {
2253 return(xmlStrdup(name));
2254 } */
2255
Owen Taylor3473f882001-02-23 17:55:21 +00002256 if (buffer == NULL)
2257 ret = xmlStrndup(buf, len);
2258 else {
2259 ret = buffer;
2260 buffer = NULL;
2261 max = XML_MAX_NAMELEN;
2262 }
2263
2264
2265 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002266 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002268 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002269 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 len = 0;
2272
Daniel Veillardbb284f42002-10-16 18:02:47 +00002273 /*
2274 * Check that the first character is proper to start
2275 * a new name
2276 */
2277 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2278 ((c >= 0x41) && (c <= 0x5A)) ||
2279 (c == '_') || (c == ':'))) {
2280 int l;
2281 int first = CUR_SCHAR(cur, l);
2282
2283 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002284 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002285 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002286 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002287 }
2288 }
2289 cur++;
2290
Owen Taylor3473f882001-02-23 17:55:21 +00002291 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2292 buf[len++] = c;
2293 c = *cur++;
2294 }
2295 if (len >= max) {
2296 /*
2297 * Okay someone managed to make a huge name, so he's ready to pay
2298 * for the processing speed.
2299 */
2300 max = len * 2;
2301
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002302 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002304 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002305 return(NULL);
2306 }
2307 memcpy(buffer, buf, len);
2308 while (c != 0) { /* tested bigname2.xml */
2309 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002310 xmlChar *tmp;
2311
Owen Taylor3473f882001-02-23 17:55:21 +00002312 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002313 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002314 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002315 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002316 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002317 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002318 return(NULL);
2319 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002320 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002321 }
2322 buffer[len++] = c;
2323 c = *cur++;
2324 }
2325 buffer[len] = 0;
2326 }
2327
2328 if (buffer == NULL)
2329 ret = xmlStrndup(buf, len);
2330 else {
2331 ret = buffer;
2332 }
2333 }
2334
2335 return(ret);
2336}
2337
2338/************************************************************************
2339 * *
2340 * The parser itself *
2341 * Relates to http://www.w3.org/TR/REC-xml *
2342 * *
2343 ************************************************************************/
2344
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002345static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002346static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002347 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002348
Owen Taylor3473f882001-02-23 17:55:21 +00002349/**
2350 * xmlParseName:
2351 * @ctxt: an XML parser context
2352 *
2353 * parse an XML name.
2354 *
2355 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2356 * CombiningChar | Extender
2357 *
2358 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2359 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002360 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002361 *
2362 * Returns the Name parsed or NULL
2363 */
2364
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002366xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002367 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002368 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 int count = 0;
2370
2371 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002372
2373 /*
2374 * Accelerator for simple ASCII names
2375 */
2376 in = ctxt->input->cur;
2377 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2378 ((*in >= 0x41) && (*in <= 0x5A)) ||
2379 (*in == '_') || (*in == ':')) {
2380 in++;
2381 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2382 ((*in >= 0x41) && (*in <= 0x5A)) ||
2383 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002384 (*in == '_') || (*in == '-') ||
2385 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002386 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002387 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002388 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002390 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002391 ctxt->nbChars += count;
2392 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002393 if (ret == NULL)
2394 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002395 return(ret);
2396 }
2397 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002398 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002399}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002400
Daniel Veillard46de64e2002-05-29 08:21:33 +00002401/**
2402 * xmlParseNameAndCompare:
2403 * @ctxt: an XML parser context
2404 *
2405 * parse an XML name and compares for match
2406 * (specialized for endtag parsing)
2407 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2409 * and the name for mismatch
2410 */
2411
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002412static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002413xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002414 register const xmlChar *cmp = other;
2415 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002416 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002417
2418 GROW;
2419
2420 in = ctxt->input->cur;
2421 while (*in != 0 && *in == *cmp) {
2422 ++in;
2423 ++cmp;
2424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002470 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002471}
2472
2473/**
2474 * xmlParseStringName:
2475 * @ctxt: an XML parser context
2476 * @str: a pointer to the string pointer (IN/OUT)
2477 *
2478 * parse an XML name.
2479 *
2480 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2481 * CombiningChar | Extender
2482 *
2483 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2484 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002485 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002486 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002487 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * is updated to the current location in the string.
2489 */
2490
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002491static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002492xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2493 xmlChar buf[XML_MAX_NAMELEN + 5];
2494 const xmlChar *cur = *str;
2495 int len = 0, l;
2496 int c;
2497
2498 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002499 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002500 (c != ':')) {
2501 return(NULL);
2502 }
2503
William M. Brack871611b2003-10-18 04:53:14 +00002504 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002505 (c == '.') || (c == '-') ||
2506 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002507 (IS_COMBINING(c)) ||
2508 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002509 COPY_BUF(l,buf,len,c);
2510 cur += l;
2511 c = CUR_SCHAR(cur, l);
2512 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2513 /*
2514 * Okay someone managed to make a huge name, so he's ready to pay
2515 * for the processing speed.
2516 */
2517 xmlChar *buffer;
2518 int max = len * 2;
2519
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002520 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002522 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 return(NULL);
2524 }
2525 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002526 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002527 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002528 (c == '.') || (c == '-') ||
2529 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002530 (IS_COMBINING(c)) ||
2531 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002533 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002534 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002538 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002540 return(NULL);
2541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002542 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002543 }
2544 COPY_BUF(l,buffer,len,c);
2545 cur += l;
2546 c = CUR_SCHAR(cur, l);
2547 }
2548 buffer[len] = 0;
2549 *str = cur;
2550 return(buffer);
2551 }
2552 }
2553 *str = cur;
2554 return(xmlStrndup(buf, len));
2555}
2556
2557/**
2558 * xmlParseNmtoken:
2559 * @ctxt: an XML parser context
2560 *
2561 * parse an XML Nmtoken.
2562 *
2563 * [7] Nmtoken ::= (NameChar)+
2564 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002565 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002566 *
2567 * Returns the Nmtoken parsed or NULL
2568 */
2569
2570xmlChar *
2571xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2572 xmlChar buf[XML_MAX_NAMELEN + 5];
2573 int len = 0, l;
2574 int c;
2575 int count = 0;
2576
2577 GROW;
2578 c = CUR_CHAR(l);
2579
William M. Brack871611b2003-10-18 04:53:14 +00002580 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002581 (c == '.') || (c == '-') ||
2582 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002583 (IS_COMBINING(c)) ||
2584 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002585 if (count++ > 100) {
2586 count = 0;
2587 GROW;
2588 }
2589 COPY_BUF(l,buf,len,c);
2590 NEXTL(l);
2591 c = CUR_CHAR(l);
2592 if (len >= XML_MAX_NAMELEN) {
2593 /*
2594 * Okay someone managed to make a huge token, so he's ready to pay
2595 * for the processing speed.
2596 */
2597 xmlChar *buffer;
2598 int max = len * 2;
2599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002606 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002607 (c == '.') || (c == '-') ||
2608 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002609 (IS_COMBINING(c)) ||
2610 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if (count++ > 100) {
2612 count = 0;
2613 GROW;
2614 }
2615 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002616 xmlChar *tmp;
2617
Owen Taylor3473f882001-02-23 17:55:21 +00002618 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002619 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 return(NULL);
2625 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002626 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 COPY_BUF(l,buffer,len,c);
2629 NEXTL(l);
2630 c = CUR_CHAR(l);
2631 }
2632 buffer[len] = 0;
2633 return(buffer);
2634 }
2635 }
2636 if (len == 0)
2637 return(NULL);
2638 return(xmlStrndup(buf, len));
2639}
2640
2641/**
2642 * xmlParseEntityValue:
2643 * @ctxt: an XML parser context
2644 * @orig: if non-NULL store a copy of the original entity value
2645 *
2646 * parse a value for ENTITY declarations
2647 *
2648 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2649 * "'" ([^%&'] | PEReference | Reference)* "'"
2650 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002651 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002652 */
2653
2654xmlChar *
2655xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2656 xmlChar *buf = NULL;
2657 int len = 0;
2658 int size = XML_PARSER_BUFFER_SIZE;
2659 int c, l;
2660 xmlChar stop;
2661 xmlChar *ret = NULL;
2662 const xmlChar *cur = NULL;
2663 xmlParserInputPtr input;
2664
2665 if (RAW == '"') stop = '"';
2666 else if (RAW == '\'') stop = '\'';
2667 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002668 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002669 return(NULL);
2670 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676
2677 /*
2678 * The content of the entity definition is copied in a buffer.
2679 */
2680
2681 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2682 input = ctxt->input;
2683 GROW;
2684 NEXT;
2685 c = CUR_CHAR(l);
2686 /*
2687 * NOTE: 4.4.5 Included in Literal
2688 * When a parameter entity reference appears in a literal entity
2689 * value, ... a single or double quote character in the replacement
2690 * text is always treated as a normal data character and will not
2691 * terminate the literal.
2692 * In practice it means we stop the loop only when back at parsing
2693 * the initial entity and the quote is found
2694 */
William M. Brack871611b2003-10-18 04:53:14 +00002695 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002696 (ctxt->input != input))) {
2697 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 xmlChar *tmp;
2699
Owen Taylor3473f882001-02-23 17:55:21 +00002700 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002701 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2702 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002703 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002704 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002707 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 COPY_BUF(l,buf,len,c);
2710 NEXTL(l);
2711 /*
2712 * Pop-up of finished entities.
2713 */
2714 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2715 xmlPopInput(ctxt);
2716
2717 GROW;
2718 c = CUR_CHAR(l);
2719 if (c == 0) {
2720 GROW;
2721 c = CUR_CHAR(l);
2722 }
2723 }
2724 buf[len] = 0;
2725
2726 /*
2727 * Raise problem w.r.t. '&' and '%' being used in non-entities
2728 * reference constructs. Note Charref will be handled in
2729 * xmlStringDecodeEntities()
2730 */
2731 cur = buf;
2732 while (*cur != 0) { /* non input consuming */
2733 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2734 xmlChar *name;
2735 xmlChar tmp = *cur;
2736
2737 cur++;
2738 name = xmlParseStringName(ctxt, &cur);
2739 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002740 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002741 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002743 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002744 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2745 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
2748 if (name != NULL)
2749 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002750 if (*cur == 0)
2751 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 cur++;
2754 }
2755
2756 /*
2757 * Then PEReference entities are substituted.
2758 */
2759 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 xmlFree(buf);
2762 } else {
2763 NEXT;
2764 /*
2765 * NOTE: 4.4.7 Bypassed
2766 * When a general entity reference appears in the EntityValue in
2767 * an entity declaration, it is bypassed and left as is.
2768 * so XML_SUBSTITUTE_REF is not set here.
2769 */
2770 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2771 0, 0, 0);
2772 if (orig != NULL)
2773 *orig = buf;
2774 else
2775 xmlFree(buf);
2776 }
2777
2778 return(ret);
2779}
2780
2781/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002782 * xmlParseAttValueComplex:
2783 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002784 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002786 *
2787 * parse a value for an attribute, this is the fallback function
2788 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002789 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002790 *
2791 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2792 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002793static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002794xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002795 xmlChar limit = 0;
2796 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 int len = 0;
2798 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002800 xmlChar *current = NULL;
2801 xmlEntityPtr ent;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (NXT(0) == '"') {
2804 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2805 limit = '"';
2806 NEXT;
2807 } else if (NXT(0) == '\'') {
2808 limit = '\'';
2809 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2810 NEXT;
2811 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815
2816 /*
2817 * allocate a translation buffer.
2818 */
2819 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002821 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002822
2823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002824 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002825 */
2826 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 while ((NXT(0) != limit) && /* checked */
2828 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002829 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002830 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002831 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (NXT(1) == '#') {
2833 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834
Owen Taylor3473f882001-02-23 17:55:21 +00002835 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002836 if (ctxt->replaceEntities) {
2837 if (len > buf_size - 10) {
2838 growBuffer(buf);
2839 }
2840 buf[len++] = '&';
2841 } else {
2842 /*
2843 * The reparsing will be done in xmlStringGetNodeList()
2844 * called by the attribute() function in SAX.c
2845 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002846 if (len > buf_size - 10) {
2847 growBuffer(buf);
2848 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002849 buf[len++] = '&';
2850 buf[len++] = '#';
2851 buf[len++] = '3';
2852 buf[len++] = '8';
2853 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002854 }
2855 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002856 if (len > buf_size - 10) {
2857 growBuffer(buf);
2858 }
Owen Taylor3473f882001-02-23 17:55:21 +00002859 len += xmlCopyChar(0, &buf[len], val);
2860 }
2861 } else {
2862 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002863 if ((ent != NULL) &&
2864 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2865 if (len > buf_size - 10) {
2866 growBuffer(buf);
2867 }
2868 if ((ctxt->replaceEntities == 0) &&
2869 (ent->content[0] == '&')) {
2870 buf[len++] = '&';
2871 buf[len++] = '#';
2872 buf[len++] = '3';
2873 buf[len++] = '8';
2874 buf[len++] = ';';
2875 } else {
2876 buf[len++] = ent->content[0];
2877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002878 } else if ((ent != NULL) &&
2879 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002880 xmlChar *rep;
2881
2882 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2883 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002884 XML_SUBSTITUTE_REF,
2885 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (rep != NULL) {
2887 current = rep;
2888 while (*current != 0) { /* non input consuming */
2889 buf[len++] = *current++;
2890 if (len > buf_size - 10) {
2891 growBuffer(buf);
2892 }
2893 }
2894 xmlFree(rep);
2895 }
2896 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002897 if (len > buf_size - 10) {
2898 growBuffer(buf);
2899 }
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (ent->content != NULL)
2901 buf[len++] = ent->content[0];
2902 }
2903 } else if (ent != NULL) {
2904 int i = xmlStrlen(ent->name);
2905 const xmlChar *cur = ent->name;
2906
2907 /*
2908 * This may look absurd but is needed to detect
2909 * entities problems
2910 */
2911 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2912 (ent->content != NULL)) {
2913 xmlChar *rep;
2914 rep = xmlStringDecodeEntities(ctxt, ent->content,
2915 XML_SUBSTITUTE_REF, 0, 0, 0);
2916 if (rep != NULL)
2917 xmlFree(rep);
2918 }
2919
2920 /*
2921 * Just output the reference
2922 */
2923 buf[len++] = '&';
2924 if (len > buf_size - i - 10) {
2925 growBuffer(buf);
2926 }
2927 for (;i > 0;i--)
2928 buf[len++] = *cur++;
2929 buf[len++] = ';';
2930 }
2931 }
2932 } else {
2933 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002934 if ((len != 0) || (!normalize)) {
2935 if ((!normalize) || (!in_space)) {
2936 COPY_BUF(l,buf,len,0x20);
2937 if (len > buf_size - 10) {
2938 growBuffer(buf);
2939 }
2940 }
2941 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002942 }
2943 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002945 COPY_BUF(l,buf,len,c);
2946 if (len > buf_size - 10) {
2947 growBuffer(buf);
2948 }
2949 }
2950 NEXTL(l);
2951 }
2952 GROW;
2953 c = CUR_CHAR(l);
2954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002955 if ((in_space) && (normalize)) {
2956 while (buf[len - 1] == 0x20) len--;
2957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002958 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2963 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002964 } else
2965 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002966 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002968
2969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972}
2973
2974/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002975 * xmlParseAttValue:
2976 * @ctxt: an XML parser context
2977 *
2978 * parse a value for an attribute
2979 * Note: the parser won't do substitution of entities here, this
2980 * will be handled later in xmlStringGetNodeList
2981 *
2982 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2983 * "'" ([^<&'] | Reference)* "'"
2984 *
2985 * 3.3.3 Attribute-Value Normalization:
2986 * Before the value of an attribute is passed to the application or
2987 * checked for validity, the XML processor must normalize it as follows:
2988 * - a character reference is processed by appending the referenced
2989 * character to the attribute value
2990 * - an entity reference is processed by recursively processing the
2991 * replacement text of the entity
2992 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2993 * appending #x20 to the normalized value, except that only a single
2994 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2995 * parsed entity or the literal entity value of an internal parsed entity
2996 * - other characters are processed by appending them to the normalized value
2997 * If the declared value is not CDATA, then the XML processor must further
2998 * process the normalized attribute value by discarding any leading and
2999 * trailing space (#x20) characters, and by replacing sequences of space
3000 * (#x20) characters by a single space (#x20) character.
3001 * All attributes for which no declaration has been read should be treated
3002 * by a non-validating parser as if declared CDATA.
3003 *
3004 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3005 */
3006
3007
3008xmlChar *
3009xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003010 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003011 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003012}
3013
3014/**
Owen Taylor3473f882001-02-23 17:55:21 +00003015 * xmlParseSystemLiteral:
3016 * @ctxt: an XML parser context
3017 *
3018 * parse an XML Literal
3019 *
3020 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3021 *
3022 * Returns the SystemLiteral parsed or NULL
3023 */
3024
3025xmlChar *
3026xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3027 xmlChar *buf = NULL;
3028 int len = 0;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int cur, l;
3031 xmlChar stop;
3032 int state = ctxt->instate;
3033 int count = 0;
3034
3035 SHRINK;
3036 if (RAW == '"') {
3037 NEXT;
3038 stop = '"';
3039 } else if (RAW == '\'') {
3040 NEXT;
3041 stop = '\'';
3042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003043 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003044 return(NULL);
3045 }
3046
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3053 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003054 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003055 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003056 xmlChar *tmp;
3057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003059 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3060 if (tmp == NULL) {
3061 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003063 ctxt->instate = (xmlParserInputState) state;
3064 return(NULL);
3065 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003066 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 }
3068 count++;
3069 if (count > 50) {
3070 GROW;
3071 count = 0;
3072 }
3073 COPY_BUF(l,buf,len,cur);
3074 NEXTL(l);
3075 cur = CUR_CHAR(l);
3076 if (cur == 0) {
3077 GROW;
3078 SHRINK;
3079 cur = CUR_CHAR(l);
3080 }
3081 }
3082 buf[len] = 0;
3083 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003084 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003085 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 } else {
3087 NEXT;
3088 }
3089 return(buf);
3090}
3091
3092/**
3093 * xmlParsePubidLiteral:
3094 * @ctxt: an XML parser context
3095 *
3096 * parse an XML public literal
3097 *
3098 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3099 *
3100 * Returns the PubidLiteral parsed or NULL.
3101 */
3102
3103xmlChar *
3104xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3105 xmlChar *buf = NULL;
3106 int len = 0;
3107 int size = XML_PARSER_BUFFER_SIZE;
3108 xmlChar cur;
3109 xmlChar stop;
3110 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003111 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003112
3113 SHRINK;
3114 if (RAW == '"') {
3115 NEXT;
3116 stop = '"';
3117 } else if (RAW == '\'') {
3118 NEXT;
3119 stop = '\'';
3120 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003121 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003122 return(NULL);
3123 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003124 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003125 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003127 return(NULL);
3128 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003129 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003130 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003131 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003132 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003133 xmlChar *tmp;
3134
Owen Taylor3473f882001-02-23 17:55:21 +00003135 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003136 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3137 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003138 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003139 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003140 return(NULL);
3141 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003142 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 buf[len++] = cur;
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 NEXT;
3151 cur = CUR;
3152 if (cur == 0) {
3153 GROW;
3154 SHRINK;
3155 cur = CUR;
3156 }
3157 }
3158 buf[len] = 0;
3159 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003161 } else {
3162 NEXT;
3163 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003164 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(buf);
3166}
3167
Daniel Veillard48b2f892001-02-25 16:11:03 +00003168void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003169/**
3170 * xmlParseCharData:
3171 * @ctxt: an XML parser context
3172 * @cdata: int indicating whether we are within a CDATA section
3173 *
3174 * parse a CharData section.
3175 * if we are within a CDATA section ']]>' marks an end of section.
3176 *
3177 * The right angle bracket (>) may be represented using the string "&gt;",
3178 * and must, for compatibility, be escaped using "&gt;" or a character
3179 * reference when it appears in the string "]]>" in content, when that
3180 * string is not marking the end of a CDATA section.
3181 *
3182 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3183 */
3184
3185void
3186xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003187 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003188 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003189 int line = ctxt->input->line;
3190 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003191
3192 SHRINK;
3193 GROW;
3194 /*
3195 * Accelerated common case where input don't need to be
3196 * modified before passing it to the handler.
3197 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003198 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003199 in = ctxt->input->cur;
3200 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003201get_more_space:
3202 while (*in == 0x20) in++;
3203 if (*in == 0xA) {
3204 ctxt->input->line++;
3205 in++;
3206 while (*in == 0xA) {
3207 ctxt->input->line++;
3208 in++;
3209 }
3210 goto get_more_space;
3211 }
3212 if (*in == '<') {
3213 nbchar = in - ctxt->input->cur;
3214 if (nbchar > 0) {
3215 const xmlChar *tmp = ctxt->input->cur;
3216 ctxt->input->cur = in;
3217
Daniel Veillard34099b42004-11-04 17:34:35 +00003218 if ((ctxt->sax != NULL) &&
3219 (ctxt->sax->ignorableWhitespace !=
3220 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003221 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3222 ctxt->sax->ignorableWhitespace(ctxt->userData,
3223 tmp, nbchar);
3224 } else if (ctxt->sax->characters != NULL)
3225 ctxt->sax->characters(ctxt->userData,
3226 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003227 } else if ((ctxt->sax != NULL) &&
3228 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003229 ctxt->sax->characters(ctxt->userData,
3230 tmp, nbchar);
3231 }
3232 }
3233 return;
3234 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003235get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003236 while (((*in > ']') && (*in <= 0x7F)) ||
3237 ((*in > '&') && (*in < '<')) ||
3238 ((*in > '<') && (*in < ']')) ||
3239 ((*in >= 0x20) && (*in < '&')) ||
3240 (*in == 0x09))
3241 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003242 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003243 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003244 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003245 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003246 ctxt->input->line++;
3247 in++;
3248 }
3249 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003250 }
3251 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003252 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003253 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003254 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003255 return;
3256 }
3257 in++;
3258 goto get_more;
3259 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003260 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003261 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003262 if ((ctxt->sax != NULL) &&
3263 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003264 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003265 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003266 const xmlChar *tmp = ctxt->input->cur;
3267 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003268
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003269 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003270 ctxt->sax->ignorableWhitespace(ctxt->userData,
3271 tmp, nbchar);
3272 } else if (ctxt->sax->characters != NULL)
3273 ctxt->sax->characters(ctxt->userData,
3274 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003275 line = ctxt->input->line;
3276 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003277 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003278 if (ctxt->sax->characters != NULL)
3279 ctxt->sax->characters(ctxt->userData,
3280 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003281 line = ctxt->input->line;
3282 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003283 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003284 }
3285 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003286 if (*in == 0xD) {
3287 in++;
3288 if (*in == 0xA) {
3289 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003290 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003291 ctxt->input->line++;
3292 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003293 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003294 in--;
3295 }
3296 if (*in == '<') {
3297 return;
3298 }
3299 if (*in == '&') {
3300 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003301 }
3302 SHRINK;
3303 GROW;
3304 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003305 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003306 nbchar = 0;
3307 }
Daniel Veillard50582112001-03-26 22:52:16 +00003308 ctxt->input->line = line;
3309 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 xmlParseCharDataComplex(ctxt, cdata);
3311}
3312
Daniel Veillard01c13b52002-12-10 15:19:08 +00003313/**
3314 * xmlParseCharDataComplex:
3315 * @ctxt: an XML parser context
3316 * @cdata: int indicating whether we are within a CDATA section
3317 *
3318 * parse a CharData section.this is the fallback function
3319 * of xmlParseCharData() when the parsing requires handling
3320 * of non-ASCII characters.
3321 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003322void
3323xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003324 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3325 int nbchar = 0;
3326 int cur, l;
3327 int count = 0;
3328
3329 SHRINK;
3330 GROW;
3331 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003332 while ((cur != '<') && /* checked */
3333 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003334 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 if ((cur == ']') && (NXT(1) == ']') &&
3336 (NXT(2) == '>')) {
3337 if (cdata) break;
3338 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003339 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003340 }
3341 }
3342 COPY_BUF(l,buf,nbchar,cur);
3343 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003344 buf[nbchar] = 0;
3345
Owen Taylor3473f882001-02-23 17:55:21 +00003346 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003347 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003348 */
3349 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003350 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003351 if (ctxt->sax->ignorableWhitespace != NULL)
3352 ctxt->sax->ignorableWhitespace(ctxt->userData,
3353 buf, nbchar);
3354 } else {
3355 if (ctxt->sax->characters != NULL)
3356 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3357 }
3358 }
3359 nbchar = 0;
3360 }
3361 count++;
3362 if (count > 50) {
3363 GROW;
3364 count = 0;
3365 }
3366 NEXTL(l);
3367 cur = CUR_CHAR(l);
3368 }
3369 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003370 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003371 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003372 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003373 */
3374 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003375 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003376 if (ctxt->sax->ignorableWhitespace != NULL)
3377 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3378 } else {
3379 if (ctxt->sax->characters != NULL)
3380 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3381 }
3382 }
3383 }
3384}
3385
3386/**
3387 * xmlParseExternalID:
3388 * @ctxt: an XML parser context
3389 * @publicID: a xmlChar** receiving PubidLiteral
3390 * @strict: indicate whether we should restrict parsing to only
3391 * production [75], see NOTE below
3392 *
3393 * Parse an External ID or a Public ID
3394 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003395 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003396 * 'PUBLIC' S PubidLiteral S SystemLiteral
3397 *
3398 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3399 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3400 *
3401 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3402 *
3403 * Returns the function returns SystemLiteral and in the second
3404 * case publicID receives PubidLiteral, is strict is off
3405 * it is possible to return NULL and have publicID set.
3406 */
3407
3408xmlChar *
3409xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3410 xmlChar *URI = NULL;
3411
3412 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003413
3414 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003415 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003416 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003417 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003418 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3419 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003420 }
3421 SKIP_BLANKS;
3422 URI = xmlParseSystemLiteral(ctxt);
3423 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003424 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003425 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003426 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003427 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003428 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003430 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003431 }
3432 SKIP_BLANKS;
3433 *publicID = xmlParsePubidLiteral(ctxt);
3434 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003435 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003436 }
3437 if (strict) {
3438 /*
3439 * We don't handle [83] so "S SystemLiteral" is required.
3440 */
William M. Brack76e95df2003-10-18 16:20:14 +00003441 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003443 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 } else {
3446 /*
3447 * We handle [83] so we return immediately, if
3448 * "S SystemLiteral" is not detected. From a purely parsing
3449 * point of view that's a nice mess.
3450 */
3451 const xmlChar *ptr;
3452 GROW;
3453
3454 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003455 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003456
William M. Brack76e95df2003-10-18 16:20:14 +00003457 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003458 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3459 }
3460 SKIP_BLANKS;
3461 URI = xmlParseSystemLiteral(ctxt);
3462 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003463 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003464 }
3465 }
3466 return(URI);
3467}
3468
3469/**
3470 * xmlParseComment:
3471 * @ctxt: an XML parser context
3472 *
3473 * Skip an XML (SGML) comment <!-- .... -->
3474 * The spec says that "For compatibility, the string "--" (double-hyphen)
3475 * must not occur within comments. "
3476 *
3477 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3478 */
3479void
3480xmlParseComment(xmlParserCtxtPtr ctxt) {
3481 xmlChar *buf = NULL;
3482 int len;
3483 int size = XML_PARSER_BUFFER_SIZE;
3484 int q, ql;
3485 int r, rl;
3486 int cur, l;
3487 xmlParserInputState state;
3488 xmlParserInputPtr input = ctxt->input;
3489 int count = 0;
3490
3491 /*
3492 * Check that there is a comment right here.
3493 */
3494 if ((RAW != '<') || (NXT(1) != '!') ||
3495 (NXT(2) != '-') || (NXT(3) != '-')) return;
3496
3497 state = ctxt->instate;
3498 ctxt->instate = XML_PARSER_COMMENT;
3499 SHRINK;
3500 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003501 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003502 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003503 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003504 ctxt->instate = state;
3505 return;
3506 }
3507 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003508 if (q == 0)
3509 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003510 NEXTL(ql);
3511 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003512 if (r == 0)
3513 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 NEXTL(rl);
3515 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003516 if (cur == 0)
3517 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003518 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003519 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003520 ((cur != '>') ||
3521 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003522 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003523 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 }
3525 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003526 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003527 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003528 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3529 if (new_buf == NULL) {
3530 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003531 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003532 ctxt->instate = state;
3533 return;
3534 }
William M. Bracka3215c72004-07-31 16:24:01 +00003535 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003536 }
3537 COPY_BUF(ql,buf,len,q);
3538 q = r;
3539 ql = rl;
3540 r = cur;
3541 rl = l;
3542
3543 count++;
3544 if (count > 50) {
3545 GROW;
3546 count = 0;
3547 }
3548 NEXTL(l);
3549 cur = CUR_CHAR(l);
3550 if (cur == 0) {
3551 SHRINK;
3552 GROW;
3553 cur = CUR_CHAR(l);
3554 }
3555 }
3556 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003557 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003558 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003559 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 xmlFree(buf);
3561 } else {
3562 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003563 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3564 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
3566 NEXT;
3567 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3568 (!ctxt->disableSAX))
3569 ctxt->sax->comment(ctxt->userData, buf);
3570 xmlFree(buf);
3571 }
3572 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003573 return;
3574not_terminated:
3575 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3576 "Comment not terminated\n", NULL);
3577 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003578}
3579
3580/**
3581 * xmlParsePITarget:
3582 * @ctxt: an XML parser context
3583 *
3584 * parse the name of a PI
3585 *
3586 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3587 *
3588 * Returns the PITarget name or NULL
3589 */
3590
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003591const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003592xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003593 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003594
3595 name = xmlParseName(ctxt);
3596 if ((name != NULL) &&
3597 ((name[0] == 'x') || (name[0] == 'X')) &&
3598 ((name[1] == 'm') || (name[1] == 'M')) &&
3599 ((name[2] == 'l') || (name[2] == 'L'))) {
3600 int i;
3601 if ((name[0] == 'x') && (name[1] == 'm') &&
3602 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003603 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003604 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003605 return(name);
3606 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003607 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003608 return(name);
3609 }
3610 for (i = 0;;i++) {
3611 if (xmlW3CPIs[i] == NULL) break;
3612 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3613 return(name);
3614 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003615 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3616 "xmlParsePITarget: invalid name prefix 'xml'\n",
3617 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003618 }
3619 return(name);
3620}
3621
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003622#ifdef LIBXML_CATALOG_ENABLED
3623/**
3624 * xmlParseCatalogPI:
3625 * @ctxt: an XML parser context
3626 * @catalog: the PI value string
3627 *
3628 * parse an XML Catalog Processing Instruction.
3629 *
3630 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3631 *
3632 * Occurs only if allowed by the user and if happening in the Misc
3633 * part of the document before any doctype informations
3634 * This will add the given catalog to the parsing context in order
3635 * to be used if there is a resolution need further down in the document
3636 */
3637
3638static void
3639xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3640 xmlChar *URL = NULL;
3641 const xmlChar *tmp, *base;
3642 xmlChar marker;
3643
3644 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003645 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003646 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3647 goto error;
3648 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003649 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003650 if (*tmp != '=') {
3651 return;
3652 }
3653 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003654 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003655 marker = *tmp;
3656 if ((marker != '\'') && (marker != '"'))
3657 goto error;
3658 tmp++;
3659 base = tmp;
3660 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3661 if (*tmp == 0)
3662 goto error;
3663 URL = xmlStrndup(base, tmp - base);
3664 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003665 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003666 if (*tmp != 0)
3667 goto error;
3668
3669 if (URL != NULL) {
3670 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3671 xmlFree(URL);
3672 }
3673 return;
3674
3675error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003676 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3677 "Catalog PI syntax error: %s\n",
3678 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003679 if (URL != NULL)
3680 xmlFree(URL);
3681}
3682#endif
3683
Owen Taylor3473f882001-02-23 17:55:21 +00003684/**
3685 * xmlParsePI:
3686 * @ctxt: an XML parser context
3687 *
3688 * parse an XML Processing Instruction.
3689 *
3690 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3691 *
3692 * The processing is transfered to SAX once parsed.
3693 */
3694
3695void
3696xmlParsePI(xmlParserCtxtPtr ctxt) {
3697 xmlChar *buf = NULL;
3698 int len = 0;
3699 int size = XML_PARSER_BUFFER_SIZE;
3700 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003701 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003702 xmlParserInputState state;
3703 int count = 0;
3704
3705 if ((RAW == '<') && (NXT(1) == '?')) {
3706 xmlParserInputPtr input = ctxt->input;
3707 state = ctxt->instate;
3708 ctxt->instate = XML_PARSER_PI;
3709 /*
3710 * this is a Processing Instruction.
3711 */
3712 SKIP(2);
3713 SHRINK;
3714
3715 /*
3716 * Parse the target name and check for special support like
3717 * namespace.
3718 */
3719 target = xmlParsePITarget(ctxt);
3720 if (target != NULL) {
3721 if ((RAW == '?') && (NXT(1) == '>')) {
3722 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003723 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3724 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 SKIP(2);
3727
3728 /*
3729 * SAX: PI detected.
3730 */
3731 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3732 (ctxt->sax->processingInstruction != NULL))
3733 ctxt->sax->processingInstruction(ctxt->userData,
3734 target, NULL);
3735 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003736 return;
3737 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003738 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003739 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003740 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003741 ctxt->instate = state;
3742 return;
3743 }
3744 cur = CUR;
3745 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003746 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3747 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003748 }
3749 SKIP_BLANKS;
3750 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003751 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003752 ((cur != '?') || (NXT(1) != '>'))) {
3753 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003754 xmlChar *tmp;
3755
Owen Taylor3473f882001-02-23 17:55:21 +00003756 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003757 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3758 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003759 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003760 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 ctxt->instate = state;
3762 return;
3763 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003764 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003765 }
3766 count++;
3767 if (count > 50) {
3768 GROW;
3769 count = 0;
3770 }
3771 COPY_BUF(l,buf,len,cur);
3772 NEXTL(l);
3773 cur = CUR_CHAR(l);
3774 if (cur == 0) {
3775 SHRINK;
3776 GROW;
3777 cur = CUR_CHAR(l);
3778 }
3779 }
3780 buf[len] = 0;
3781 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003782 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3783 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 } else {
3785 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3787 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003788 }
3789 SKIP(2);
3790
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003791#ifdef LIBXML_CATALOG_ENABLED
3792 if (((state == XML_PARSER_MISC) ||
3793 (state == XML_PARSER_START)) &&
3794 (xmlStrEqual(target, XML_CATALOG_PI))) {
3795 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3796 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3797 (allow == XML_CATA_ALLOW_ALL))
3798 xmlParseCatalogPI(ctxt, buf);
3799 }
3800#endif
3801
3802
Owen Taylor3473f882001-02-23 17:55:21 +00003803 /*
3804 * SAX: PI detected.
3805 */
3806 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3807 (ctxt->sax->processingInstruction != NULL))
3808 ctxt->sax->processingInstruction(ctxt->userData,
3809 target, buf);
3810 }
3811 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003812 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003813 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 ctxt->instate = state;
3816 }
3817}
3818
3819/**
3820 * xmlParseNotationDecl:
3821 * @ctxt: an XML parser context
3822 *
3823 * parse a notation declaration
3824 *
3825 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3826 *
3827 * Hence there is actually 3 choices:
3828 * 'PUBLIC' S PubidLiteral
3829 * 'PUBLIC' S PubidLiteral S SystemLiteral
3830 * and 'SYSTEM' S SystemLiteral
3831 *
3832 * See the NOTE on xmlParseExternalID().
3833 */
3834
3835void
3836xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003837 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003838 xmlChar *Pubid;
3839 xmlChar *Systemid;
3840
Daniel Veillarda07050d2003-10-19 14:46:32 +00003841 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003842 xmlParserInputPtr input = ctxt->input;
3843 SHRINK;
3844 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003845 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003846 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3847 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003848 return;
3849 }
3850 SKIP_BLANKS;
3851
Daniel Veillard76d66f42001-05-16 21:05:17 +00003852 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003853 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003854 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003855 return;
3856 }
William M. Brack76e95df2003-10-18 16:20:14 +00003857 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003858 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003859 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003860 return;
3861 }
3862 SKIP_BLANKS;
3863
3864 /*
3865 * Parse the IDs.
3866 */
3867 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3868 SKIP_BLANKS;
3869
3870 if (RAW == '>') {
3871 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003872 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3873 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003874 }
3875 NEXT;
3876 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3877 (ctxt->sax->notationDecl != NULL))
3878 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3879 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003880 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003881 }
Owen Taylor3473f882001-02-23 17:55:21 +00003882 if (Systemid != NULL) xmlFree(Systemid);
3883 if (Pubid != NULL) xmlFree(Pubid);
3884 }
3885}
3886
3887/**
3888 * xmlParseEntityDecl:
3889 * @ctxt: an XML parser context
3890 *
3891 * parse <!ENTITY declarations
3892 *
3893 * [70] EntityDecl ::= GEDecl | PEDecl
3894 *
3895 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3896 *
3897 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3898 *
3899 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3900 *
3901 * [74] PEDef ::= EntityValue | ExternalID
3902 *
3903 * [76] NDataDecl ::= S 'NDATA' S Name
3904 *
3905 * [ VC: Notation Declared ]
3906 * The Name must match the declared name of a notation.
3907 */
3908
3909void
3910xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003911 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003912 xmlChar *value = NULL;
3913 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003914 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003915 int isParameter = 0;
3916 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003917 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003918
3919 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003920 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003921 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003922 SHRINK;
3923 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003924 skipped = SKIP_BLANKS;
3925 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003926 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3927 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003928 }
Owen Taylor3473f882001-02-23 17:55:21 +00003929
3930 if (RAW == '%') {
3931 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003932 skipped = SKIP_BLANKS;
3933 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3935 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003936 }
Owen Taylor3473f882001-02-23 17:55:21 +00003937 isParameter = 1;
3938 }
3939
Daniel Veillard76d66f42001-05-16 21:05:17 +00003940 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003941 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003942 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3943 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003944 return;
3945 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003946 skipped = SKIP_BLANKS;
3947 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003948 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3949 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003950 }
Owen Taylor3473f882001-02-23 17:55:21 +00003951
Daniel Veillardf5582f12002-06-11 10:08:16 +00003952 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003953 /*
3954 * handle the various case of definitions...
3955 */
3956 if (isParameter) {
3957 if ((RAW == '"') || (RAW == '\'')) {
3958 value = xmlParseEntityValue(ctxt, &orig);
3959 if (value) {
3960 if ((ctxt->sax != NULL) &&
3961 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3962 ctxt->sax->entityDecl(ctxt->userData, name,
3963 XML_INTERNAL_PARAMETER_ENTITY,
3964 NULL, NULL, value);
3965 }
3966 } else {
3967 URI = xmlParseExternalID(ctxt, &literal, 1);
3968 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003969 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 }
3971 if (URI) {
3972 xmlURIPtr uri;
3973
3974 uri = xmlParseURI((const char *) URI);
3975 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003976 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3977 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003978 /*
3979 * This really ought to be a well formedness error
3980 * but the XML Core WG decided otherwise c.f. issue
3981 * E26 of the XML erratas.
3982 */
Owen Taylor3473f882001-02-23 17:55:21 +00003983 } else {
3984 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003985 /*
3986 * Okay this is foolish to block those but not
3987 * invalid URIs.
3988 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003989 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003990 } else {
3991 if ((ctxt->sax != NULL) &&
3992 (!ctxt->disableSAX) &&
3993 (ctxt->sax->entityDecl != NULL))
3994 ctxt->sax->entityDecl(ctxt->userData, name,
3995 XML_EXTERNAL_PARAMETER_ENTITY,
3996 literal, URI, NULL);
3997 }
3998 xmlFreeURI(uri);
3999 }
4000 }
4001 }
4002 } else {
4003 if ((RAW == '"') || (RAW == '\'')) {
4004 value = xmlParseEntityValue(ctxt, &orig);
4005 if ((ctxt->sax != NULL) &&
4006 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4007 ctxt->sax->entityDecl(ctxt->userData, name,
4008 XML_INTERNAL_GENERAL_ENTITY,
4009 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004010 /*
4011 * For expat compatibility in SAX mode.
4012 */
4013 if ((ctxt->myDoc == NULL) ||
4014 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4015 if (ctxt->myDoc == NULL) {
4016 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4017 }
4018 if (ctxt->myDoc->intSubset == NULL)
4019 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4020 BAD_CAST "fake", NULL, NULL);
4021
Daniel Veillard1af9a412003-08-20 22:54:39 +00004022 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4023 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004024 }
Owen Taylor3473f882001-02-23 17:55:21 +00004025 } else {
4026 URI = xmlParseExternalID(ctxt, &literal, 1);
4027 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004028 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004029 }
4030 if (URI) {
4031 xmlURIPtr uri;
4032
4033 uri = xmlParseURI((const char *)URI);
4034 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004035 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4036 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004037 /*
4038 * This really ought to be a well formedness error
4039 * but the XML Core WG decided otherwise c.f. issue
4040 * E26 of the XML erratas.
4041 */
Owen Taylor3473f882001-02-23 17:55:21 +00004042 } else {
4043 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004044 /*
4045 * Okay this is foolish to block those but not
4046 * invalid URIs.
4047 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 }
4050 xmlFreeURI(uri);
4051 }
4052 }
William M. Brack76e95df2003-10-18 16:20:14 +00004053 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004054 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4055 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004056 }
4057 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004058 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004059 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004060 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4062 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004063 }
4064 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004065 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004066 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4067 (ctxt->sax->unparsedEntityDecl != NULL))
4068 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4069 literal, URI, ndata);
4070 } else {
4071 if ((ctxt->sax != NULL) &&
4072 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4073 ctxt->sax->entityDecl(ctxt->userData, name,
4074 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4075 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004076 /*
4077 * For expat compatibility in SAX mode.
4078 * assuming the entity repalcement was asked for
4079 */
4080 if ((ctxt->replaceEntities != 0) &&
4081 ((ctxt->myDoc == NULL) ||
4082 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4083 if (ctxt->myDoc == NULL) {
4084 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4085 }
4086
4087 if (ctxt->myDoc->intSubset == NULL)
4088 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4089 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004090 xmlSAX2EntityDecl(ctxt, name,
4091 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4092 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004093 }
Owen Taylor3473f882001-02-23 17:55:21 +00004094 }
4095 }
4096 }
4097 SKIP_BLANKS;
4098 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004099 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004100 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004101 } else {
4102 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004103 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4104 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004105 }
4106 NEXT;
4107 }
4108 if (orig != NULL) {
4109 /*
4110 * Ugly mechanism to save the raw entity value.
4111 */
4112 xmlEntityPtr cur = NULL;
4113
4114 if (isParameter) {
4115 if ((ctxt->sax != NULL) &&
4116 (ctxt->sax->getParameterEntity != NULL))
4117 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4118 } else {
4119 if ((ctxt->sax != NULL) &&
4120 (ctxt->sax->getEntity != NULL))
4121 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004122 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004123 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004124 }
Owen Taylor3473f882001-02-23 17:55:21 +00004125 }
4126 if (cur != NULL) {
4127 if (cur->orig != NULL)
4128 xmlFree(orig);
4129 else
4130 cur->orig = orig;
4131 } else
4132 xmlFree(orig);
4133 }
Owen Taylor3473f882001-02-23 17:55:21 +00004134 if (value != NULL) xmlFree(value);
4135 if (URI != NULL) xmlFree(URI);
4136 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004137 }
4138}
4139
4140/**
4141 * xmlParseDefaultDecl:
4142 * @ctxt: an XML parser context
4143 * @value: Receive a possible fixed default value for the attribute
4144 *
4145 * Parse an attribute default declaration
4146 *
4147 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4148 *
4149 * [ VC: Required Attribute ]
4150 * if the default declaration is the keyword #REQUIRED, then the
4151 * attribute must be specified for all elements of the type in the
4152 * attribute-list declaration.
4153 *
4154 * [ VC: Attribute Default Legal ]
4155 * The declared default value must meet the lexical constraints of
4156 * the declared attribute type c.f. xmlValidateAttributeDecl()
4157 *
4158 * [ VC: Fixed Attribute Default ]
4159 * if an attribute has a default value declared with the #FIXED
4160 * keyword, instances of that attribute must match the default value.
4161 *
4162 * [ WFC: No < in Attribute Values ]
4163 * handled in xmlParseAttValue()
4164 *
4165 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4166 * or XML_ATTRIBUTE_FIXED.
4167 */
4168
4169int
4170xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4171 int val;
4172 xmlChar *ret;
4173
4174 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004175 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004176 SKIP(9);
4177 return(XML_ATTRIBUTE_REQUIRED);
4178 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004179 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004180 SKIP(8);
4181 return(XML_ATTRIBUTE_IMPLIED);
4182 }
4183 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004184 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004185 SKIP(6);
4186 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004187 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004188 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4189 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004190 }
4191 SKIP_BLANKS;
4192 }
4193 ret = xmlParseAttValue(ctxt);
4194 ctxt->instate = XML_PARSER_DTD;
4195 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004196 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004197 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004198 } else
4199 *value = ret;
4200 return(val);
4201}
4202
4203/**
4204 * xmlParseNotationType:
4205 * @ctxt: an XML parser context
4206 *
4207 * parse an Notation attribute type.
4208 *
4209 * Note: the leading 'NOTATION' S part has already being parsed...
4210 *
4211 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4212 *
4213 * [ VC: Notation Attributes ]
4214 * Values of this type must match one of the notation names included
4215 * in the declaration; all notation names in the declaration must be declared.
4216 *
4217 * Returns: the notation attribute tree built while parsing
4218 */
4219
4220xmlEnumerationPtr
4221xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004222 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4224
4225 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004226 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004227 return(NULL);
4228 }
4229 SHRINK;
4230 do {
4231 NEXT;
4232 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004233 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004234 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004235 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4236 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004237 return(ret);
4238 }
4239 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 if (cur == NULL) return(ret);
4241 if (last == NULL) ret = last = cur;
4242 else {
4243 last->next = cur;
4244 last = cur;
4245 }
4246 SKIP_BLANKS;
4247 } while (RAW == '|');
4248 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004249 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004250 if ((last != NULL) && (last != ret))
4251 xmlFreeEnumeration(last);
4252 return(ret);
4253 }
4254 NEXT;
4255 return(ret);
4256}
4257
4258/**
4259 * xmlParseEnumerationType:
4260 * @ctxt: an XML parser context
4261 *
4262 * parse an Enumeration attribute type.
4263 *
4264 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4265 *
4266 * [ VC: Enumeration ]
4267 * Values of this type must match one of the Nmtoken tokens in
4268 * the declaration
4269 *
4270 * Returns: the enumeration attribute tree built while parsing
4271 */
4272
4273xmlEnumerationPtr
4274xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4275 xmlChar *name;
4276 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4277
4278 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004279 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004280 return(NULL);
4281 }
4282 SHRINK;
4283 do {
4284 NEXT;
4285 SKIP_BLANKS;
4286 name = xmlParseNmtoken(ctxt);
4287 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004288 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 return(ret);
4290 }
4291 cur = xmlCreateEnumeration(name);
4292 xmlFree(name);
4293 if (cur == NULL) return(ret);
4294 if (last == NULL) ret = last = cur;
4295 else {
4296 last->next = cur;
4297 last = cur;
4298 }
4299 SKIP_BLANKS;
4300 } while (RAW == '|');
4301 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004302 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004303 return(ret);
4304 }
4305 NEXT;
4306 return(ret);
4307}
4308
4309/**
4310 * xmlParseEnumeratedType:
4311 * @ctxt: an XML parser context
4312 * @tree: the enumeration tree built while parsing
4313 *
4314 * parse an Enumerated attribute type.
4315 *
4316 * [57] EnumeratedType ::= NotationType | Enumeration
4317 *
4318 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4319 *
4320 *
4321 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4322 */
4323
4324int
4325xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004326 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004328 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4330 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004331 return(0);
4332 }
4333 SKIP_BLANKS;
4334 *tree = xmlParseNotationType(ctxt);
4335 if (*tree == NULL) return(0);
4336 return(XML_ATTRIBUTE_NOTATION);
4337 }
4338 *tree = xmlParseEnumerationType(ctxt);
4339 if (*tree == NULL) return(0);
4340 return(XML_ATTRIBUTE_ENUMERATION);
4341}
4342
4343/**
4344 * xmlParseAttributeType:
4345 * @ctxt: an XML parser context
4346 * @tree: the enumeration tree built while parsing
4347 *
4348 * parse the Attribute list def for an element
4349 *
4350 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4351 *
4352 * [55] StringType ::= 'CDATA'
4353 *
4354 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4355 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4356 *
4357 * Validity constraints for attribute values syntax are checked in
4358 * xmlValidateAttributeValue()
4359 *
4360 * [ VC: ID ]
4361 * Values of type ID must match the Name production. A name must not
4362 * appear more than once in an XML document as a value of this type;
4363 * i.e., ID values must uniquely identify the elements which bear them.
4364 *
4365 * [ VC: One ID per Element Type ]
4366 * No element type may have more than one ID attribute specified.
4367 *
4368 * [ VC: ID Attribute Default ]
4369 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4370 *
4371 * [ VC: IDREF ]
4372 * Values of type IDREF must match the Name production, and values
4373 * of type IDREFS must match Names; each IDREF Name must match the value
4374 * of an ID attribute on some element in the XML document; i.e. IDREF
4375 * values must match the value of some ID attribute.
4376 *
4377 * [ VC: Entity Name ]
4378 * Values of type ENTITY must match the Name production, values
4379 * of type ENTITIES must match Names; each Entity Name must match the
4380 * name of an unparsed entity declared in the DTD.
4381 *
4382 * [ VC: Name Token ]
4383 * Values of type NMTOKEN must match the Nmtoken production; values
4384 * of type NMTOKENS must match Nmtokens.
4385 *
4386 * Returns the attribute type
4387 */
4388int
4389xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4390 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004391 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004392 SKIP(5);
4393 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004394 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004395 SKIP(6);
4396 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004397 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004398 SKIP(5);
4399 return(XML_ATTRIBUTE_IDREF);
4400 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4401 SKIP(2);
4402 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004403 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004404 SKIP(6);
4405 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004406 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004407 SKIP(8);
4408 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004409 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004410 SKIP(8);
4411 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004412 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004413 SKIP(7);
4414 return(XML_ATTRIBUTE_NMTOKEN);
4415 }
4416 return(xmlParseEnumeratedType(ctxt, tree));
4417}
4418
4419/**
4420 * xmlParseAttributeListDecl:
4421 * @ctxt: an XML parser context
4422 *
4423 * : parse the Attribute list def for an element
4424 *
4425 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4426 *
4427 * [53] AttDef ::= S Name S AttType S DefaultDecl
4428 *
4429 */
4430void
4431xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004432 const xmlChar *elemName;
4433 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 xmlEnumerationPtr tree;
4435
Daniel Veillarda07050d2003-10-19 14:46:32 +00004436 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004437 xmlParserInputPtr input = ctxt->input;
4438
4439 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004440 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004442 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004445 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004446 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004447 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4448 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004449 return;
4450 }
4451 SKIP_BLANKS;
4452 GROW;
4453 while (RAW != '>') {
4454 const xmlChar *check = CUR_PTR;
4455 int type;
4456 int def;
4457 xmlChar *defaultValue = NULL;
4458
4459 GROW;
4460 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004461 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004462 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004463 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4464 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004465 break;
4466 }
4467 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004468 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004469 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004470 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004471 if (defaultValue != NULL)
4472 xmlFree(defaultValue);
4473 break;
4474 }
4475 SKIP_BLANKS;
4476
4477 type = xmlParseAttributeType(ctxt, &tree);
4478 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004479 if (defaultValue != NULL)
4480 xmlFree(defaultValue);
4481 break;
4482 }
4483
4484 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004485 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004486 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4487 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004488 if (defaultValue != NULL)
4489 xmlFree(defaultValue);
4490 if (tree != NULL)
4491 xmlFreeEnumeration(tree);
4492 break;
4493 }
4494 SKIP_BLANKS;
4495
4496 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4497 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004498 if (defaultValue != NULL)
4499 xmlFree(defaultValue);
4500 if (tree != NULL)
4501 xmlFreeEnumeration(tree);
4502 break;
4503 }
4504
4505 GROW;
4506 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004507 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004508 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004509 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004510 if (defaultValue != NULL)
4511 xmlFree(defaultValue);
4512 if (tree != NULL)
4513 xmlFreeEnumeration(tree);
4514 break;
4515 }
4516 SKIP_BLANKS;
4517 }
4518 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004519 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4520 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004521 if (defaultValue != NULL)
4522 xmlFree(defaultValue);
4523 if (tree != NULL)
4524 xmlFreeEnumeration(tree);
4525 break;
4526 }
4527 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4528 (ctxt->sax->attributeDecl != NULL))
4529 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4530 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004531 else if (tree != NULL)
4532 xmlFreeEnumeration(tree);
4533
4534 if ((ctxt->sax2) && (defaultValue != NULL) &&
4535 (def != XML_ATTRIBUTE_IMPLIED) &&
4536 (def != XML_ATTRIBUTE_REQUIRED)) {
4537 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4538 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004539 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4540 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4541 }
Owen Taylor3473f882001-02-23 17:55:21 +00004542 if (defaultValue != NULL)
4543 xmlFree(defaultValue);
4544 GROW;
4545 }
4546 if (RAW == '>') {
4547 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004548 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4549 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004550 }
4551 NEXT;
4552 }
Owen Taylor3473f882001-02-23 17:55:21 +00004553 }
4554}
4555
4556/**
4557 * xmlParseElementMixedContentDecl:
4558 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004559 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004560 *
4561 * parse the declaration for a Mixed Element content
4562 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4563 *
4564 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4565 * '(' S? '#PCDATA' S? ')'
4566 *
4567 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4568 *
4569 * [ VC: No Duplicate Types ]
4570 * The same name must not appear more than once in a single
4571 * mixed-content declaration.
4572 *
4573 * returns: the list of the xmlElementContentPtr describing the element choices
4574 */
4575xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004576xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004577 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004578 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004579
4580 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004581 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004582 SKIP(7);
4583 SKIP_BLANKS;
4584 SHRINK;
4585 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004586 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004587 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4588"Element content declaration doesn't start and stop in the same entity\n",
4589 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591 NEXT;
4592 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4593 if (RAW == '*') {
4594 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4595 NEXT;
4596 }
4597 return(ret);
4598 }
4599 if ((RAW == '(') || (RAW == '|')) {
4600 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4601 if (ret == NULL) return(NULL);
4602 }
4603 while (RAW == '|') {
4604 NEXT;
4605 if (elem == NULL) {
4606 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4607 if (ret == NULL) return(NULL);
4608 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004609 if (cur != NULL)
4610 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 cur = ret;
4612 } else {
4613 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4614 if (n == NULL) return(NULL);
4615 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004616 if (n->c1 != NULL)
4617 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004618 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004619 if (n != NULL)
4620 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004621 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
4623 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004624 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004625 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004626 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004627 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004628 xmlFreeElementContent(cur);
4629 return(NULL);
4630 }
4631 SKIP_BLANKS;
4632 GROW;
4633 }
4634 if ((RAW == ')') && (NXT(1) == '*')) {
4635 if (elem != NULL) {
4636 cur->c2 = xmlNewElementContent(elem,
4637 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004638 if (cur->c2 != NULL)
4639 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004640 }
4641 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004642 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004643 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4644"Element content declaration doesn't start and stop in the same entity\n",
4645 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004646 }
Owen Taylor3473f882001-02-23 17:55:21 +00004647 SKIP(2);
4648 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004649 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004650 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 return(NULL);
4652 }
4653
4654 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004655 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004656 }
4657 return(ret);
4658}
4659
4660/**
4661 * xmlParseElementChildrenContentDecl:
4662 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004663 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004664 *
4665 * parse the declaration for a Mixed Element content
4666 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4667 *
4668 *
4669 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4670 *
4671 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4672 *
4673 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4674 *
4675 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4676 *
4677 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4678 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004679 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004680 * opening or closing parentheses in a choice, seq, or Mixed
4681 * construct is contained in the replacement text for a parameter
4682 * entity, both must be contained in the same replacement text. For
4683 * interoperability, if a parameter-entity reference appears in a
4684 * choice, seq, or Mixed construct, its replacement text should not
4685 * be empty, and neither the first nor last non-blank character of
4686 * the replacement text should be a connector (| or ,).
4687 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004688 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004689 * hierarchy.
4690 */
4691xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004692xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004693 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004694 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004695 xmlChar type = 0;
4696
4697 SKIP_BLANKS;
4698 GROW;
4699 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004700 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004701
Owen Taylor3473f882001-02-23 17:55:21 +00004702 /* Recurse on first child */
4703 NEXT;
4704 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004705 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004706 SKIP_BLANKS;
4707 GROW;
4708 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004709 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004711 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004712 return(NULL);
4713 }
4714 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004715 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004716 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004717 return(NULL);
4718 }
Owen Taylor3473f882001-02-23 17:55:21 +00004719 GROW;
4720 if (RAW == '?') {
4721 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4722 NEXT;
4723 } else if (RAW == '*') {
4724 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4725 NEXT;
4726 } else if (RAW == '+') {
4727 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4728 NEXT;
4729 } else {
4730 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4731 }
Owen Taylor3473f882001-02-23 17:55:21 +00004732 GROW;
4733 }
4734 SKIP_BLANKS;
4735 SHRINK;
4736 while (RAW != ')') {
4737 /*
4738 * Each loop we parse one separator and one element.
4739 */
4740 if (RAW == ',') {
4741 if (type == 0) type = CUR;
4742
4743 /*
4744 * Detect "Name | Name , Name" error
4745 */
4746 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004747 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004748 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004749 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004750 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004751 xmlFreeElementContent(last);
4752 if (ret != NULL)
4753 xmlFreeElementContent(ret);
4754 return(NULL);
4755 }
4756 NEXT;
4757
4758 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4759 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004760 if ((last != NULL) && (last != ret))
4761 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004762 xmlFreeElementContent(ret);
4763 return(NULL);
4764 }
4765 if (last == NULL) {
4766 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004767 if (ret != NULL)
4768 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004769 ret = cur = op;
4770 } else {
4771 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004772 if (op != NULL)
4773 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004774 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004775 if (last != NULL)
4776 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004777 cur =op;
4778 last = NULL;
4779 }
4780 } else if (RAW == '|') {
4781 if (type == 0) type = CUR;
4782
4783 /*
4784 * Detect "Name , Name | Name" error
4785 */
4786 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004787 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004788 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004789 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004790 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004791 xmlFreeElementContent(last);
4792 if (ret != NULL)
4793 xmlFreeElementContent(ret);
4794 return(NULL);
4795 }
4796 NEXT;
4797
4798 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4799 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004800 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004801 xmlFreeElementContent(last);
4802 if (ret != NULL)
4803 xmlFreeElementContent(ret);
4804 return(NULL);
4805 }
4806 if (last == NULL) {
4807 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004808 if (ret != NULL)
4809 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004810 ret = cur = op;
4811 } else {
4812 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004813 if (op != NULL)
4814 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004815 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004816 if (last != NULL)
4817 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004818 cur =op;
4819 last = NULL;
4820 }
4821 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004822 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004823 if (ret != NULL)
4824 xmlFreeElementContent(ret);
4825 return(NULL);
4826 }
4827 GROW;
4828 SKIP_BLANKS;
4829 GROW;
4830 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004831 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004832 /* Recurse on second child */
4833 NEXT;
4834 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004835 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004836 SKIP_BLANKS;
4837 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004838 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004840 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004841 if (ret != NULL)
4842 xmlFreeElementContent(ret);
4843 return(NULL);
4844 }
4845 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 if (RAW == '?') {
4847 last->ocur = XML_ELEMENT_CONTENT_OPT;
4848 NEXT;
4849 } else if (RAW == '*') {
4850 last->ocur = XML_ELEMENT_CONTENT_MULT;
4851 NEXT;
4852 } else if (RAW == '+') {
4853 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4854 NEXT;
4855 } else {
4856 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4857 }
4858 }
4859 SKIP_BLANKS;
4860 GROW;
4861 }
4862 if ((cur != NULL) && (last != NULL)) {
4863 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004864 if (last != NULL)
4865 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004866 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004867 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004868 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4869"Element content declaration doesn't start and stop in the same entity\n",
4870 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004871 }
Owen Taylor3473f882001-02-23 17:55:21 +00004872 NEXT;
4873 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004874 if (ret != NULL) {
4875 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4876 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4877 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4878 else
4879 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4880 }
Owen Taylor3473f882001-02-23 17:55:21 +00004881 NEXT;
4882 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004883 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004884 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004885 cur = ret;
4886 /*
4887 * Some normalization:
4888 * (a | b* | c?)* == (a | b | c)*
4889 */
4890 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4891 if ((cur->c1 != NULL) &&
4892 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4893 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4894 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4895 if ((cur->c2 != NULL) &&
4896 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4897 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4898 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4899 cur = cur->c2;
4900 }
4901 }
Owen Taylor3473f882001-02-23 17:55:21 +00004902 NEXT;
4903 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004904 if (ret != NULL) {
4905 int found = 0;
4906
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004907 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4908 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4909 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004910 else
4911 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004912 /*
4913 * Some normalization:
4914 * (a | b*)+ == (a | b)*
4915 * (a | b?)+ == (a | b)*
4916 */
4917 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4918 if ((cur->c1 != NULL) &&
4919 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4920 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4921 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4922 found = 1;
4923 }
4924 if ((cur->c2 != NULL) &&
4925 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4926 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4927 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4928 found = 1;
4929 }
4930 cur = cur->c2;
4931 }
4932 if (found)
4933 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4934 }
Owen Taylor3473f882001-02-23 17:55:21 +00004935 NEXT;
4936 }
4937 return(ret);
4938}
4939
4940/**
4941 * xmlParseElementContentDecl:
4942 * @ctxt: an XML parser context
4943 * @name: the name of the element being defined.
4944 * @result: the Element Content pointer will be stored here if any
4945 *
4946 * parse the declaration for an Element content either Mixed or Children,
4947 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4948 *
4949 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4950 *
4951 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4952 */
4953
4954int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004955xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004956 xmlElementContentPtr *result) {
4957
4958 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004959 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004960 int res;
4961
4962 *result = NULL;
4963
4964 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004965 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004966 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004967 return(-1);
4968 }
4969 NEXT;
4970 GROW;
4971 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004972 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004973 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 res = XML_ELEMENT_TYPE_MIXED;
4975 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004976 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004977 res = XML_ELEMENT_TYPE_ELEMENT;
4978 }
Owen Taylor3473f882001-02-23 17:55:21 +00004979 SKIP_BLANKS;
4980 *result = tree;
4981 return(res);
4982}
4983
4984/**
4985 * xmlParseElementDecl:
4986 * @ctxt: an XML parser context
4987 *
4988 * parse an Element declaration.
4989 *
4990 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4991 *
4992 * [ VC: Unique Element Type Declaration ]
4993 * No element type may be declared more than once
4994 *
4995 * Returns the type of the element, or -1 in case of error
4996 */
4997int
4998xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004999 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005000 int ret = -1;
5001 xmlElementContentPtr content = NULL;
5002
5003 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005004 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005005 xmlParserInputPtr input = ctxt->input;
5006
5007 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005008 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5010 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005011 }
5012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005013 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005014 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005015 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5016 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005017 return(-1);
5018 }
5019 while ((RAW == 0) && (ctxt->inputNr > 1))
5020 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005021 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5023 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005024 }
5025 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005026 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005027 SKIP(5);
5028 /*
5029 * Element must always be empty.
5030 */
5031 ret = XML_ELEMENT_TYPE_EMPTY;
5032 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5033 (NXT(2) == 'Y')) {
5034 SKIP(3);
5035 /*
5036 * Element is a generic container.
5037 */
5038 ret = XML_ELEMENT_TYPE_ANY;
5039 } else if (RAW == '(') {
5040 ret = xmlParseElementContentDecl(ctxt, name, &content);
5041 } else {
5042 /*
5043 * [ WFC: PEs in Internal Subset ] error handling.
5044 */
5045 if ((RAW == '%') && (ctxt->external == 0) &&
5046 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005047 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005048 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005049 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005050 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005051 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5052 }
Owen Taylor3473f882001-02-23 17:55:21 +00005053 return(-1);
5054 }
5055
5056 SKIP_BLANKS;
5057 /*
5058 * Pop-up of finished entities.
5059 */
5060 while ((RAW == 0) && (ctxt->inputNr > 1))
5061 xmlPopInput(ctxt);
5062 SKIP_BLANKS;
5063
5064 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005065 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005066 } else {
5067 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005068 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5069 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005070 }
5071
5072 NEXT;
5073 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5074 (ctxt->sax->elementDecl != NULL))
5075 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5076 content);
5077 }
5078 if (content != NULL) {
5079 xmlFreeElementContent(content);
5080 }
Owen Taylor3473f882001-02-23 17:55:21 +00005081 }
5082 return(ret);
5083}
5084
5085/**
Owen Taylor3473f882001-02-23 17:55:21 +00005086 * xmlParseConditionalSections
5087 * @ctxt: an XML parser context
5088 *
5089 * [61] conditionalSect ::= includeSect | ignoreSect
5090 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5091 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5092 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5093 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5094 */
5095
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005096static void
Owen Taylor3473f882001-02-23 17:55:21 +00005097xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5098 SKIP(3);
5099 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005100 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005101 SKIP(7);
5102 SKIP_BLANKS;
5103 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005104 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005105 } else {
5106 NEXT;
5107 }
5108 if (xmlParserDebugEntities) {
5109 if ((ctxt->input != NULL) && (ctxt->input->filename))
5110 xmlGenericError(xmlGenericErrorContext,
5111 "%s(%d): ", ctxt->input->filename,
5112 ctxt->input->line);
5113 xmlGenericError(xmlGenericErrorContext,
5114 "Entering INCLUDE Conditional Section\n");
5115 }
5116
5117 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5118 (NXT(2) != '>'))) {
5119 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005120 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005121
5122 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5123 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005124 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005125 NEXT;
5126 } else if (RAW == '%') {
5127 xmlParsePEReference(ctxt);
5128 } else
5129 xmlParseMarkupDecl(ctxt);
5130
5131 /*
5132 * Pop-up of finished entities.
5133 */
5134 while ((RAW == 0) && (ctxt->inputNr > 1))
5135 xmlPopInput(ctxt);
5136
Daniel Veillardfdc91562002-07-01 21:52:03 +00005137 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 break;
5140 }
5141 }
5142 if (xmlParserDebugEntities) {
5143 if ((ctxt->input != NULL) && (ctxt->input->filename))
5144 xmlGenericError(xmlGenericErrorContext,
5145 "%s(%d): ", ctxt->input->filename,
5146 ctxt->input->line);
5147 xmlGenericError(xmlGenericErrorContext,
5148 "Leaving INCLUDE Conditional Section\n");
5149 }
5150
Daniel Veillarda07050d2003-10-19 14:46:32 +00005151 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005152 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005153 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005154 int depth = 0;
5155
5156 SKIP(6);
5157 SKIP_BLANKS;
5158 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005159 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005160 } else {
5161 NEXT;
5162 }
5163 if (xmlParserDebugEntities) {
5164 if ((ctxt->input != NULL) && (ctxt->input->filename))
5165 xmlGenericError(xmlGenericErrorContext,
5166 "%s(%d): ", ctxt->input->filename,
5167 ctxt->input->line);
5168 xmlGenericError(xmlGenericErrorContext,
5169 "Entering IGNORE Conditional Section\n");
5170 }
5171
5172 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005173 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005174 * But disable SAX event generating DTD building in the meantime
5175 */
5176 state = ctxt->disableSAX;
5177 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005178 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005179 ctxt->instate = XML_PARSER_IGNORE;
5180
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005181 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005182 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5183 depth++;
5184 SKIP(3);
5185 continue;
5186 }
5187 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5188 if (--depth >= 0) SKIP(3);
5189 continue;
5190 }
5191 NEXT;
5192 continue;
5193 }
5194
5195 ctxt->disableSAX = state;
5196 ctxt->instate = instate;
5197
5198 if (xmlParserDebugEntities) {
5199 if ((ctxt->input != NULL) && (ctxt->input->filename))
5200 xmlGenericError(xmlGenericErrorContext,
5201 "%s(%d): ", ctxt->input->filename,
5202 ctxt->input->line);
5203 xmlGenericError(xmlGenericErrorContext,
5204 "Leaving IGNORE Conditional Section\n");
5205 }
5206
5207 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005208 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 }
5210
5211 if (RAW == 0)
5212 SHRINK;
5213
5214 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005215 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005216 } else {
5217 SKIP(3);
5218 }
5219}
5220
5221/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005222 * xmlParseMarkupDecl:
5223 * @ctxt: an XML parser context
5224 *
5225 * parse Markup declarations
5226 *
5227 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5228 * NotationDecl | PI | Comment
5229 *
5230 * [ VC: Proper Declaration/PE Nesting ]
5231 * Parameter-entity replacement text must be properly nested with
5232 * markup declarations. That is to say, if either the first character
5233 * or the last character of a markup declaration (markupdecl above) is
5234 * contained in the replacement text for a parameter-entity reference,
5235 * both must be contained in the same replacement text.
5236 *
5237 * [ WFC: PEs in Internal Subset ]
5238 * In the internal DTD subset, parameter-entity references can occur
5239 * only where markup declarations can occur, not within markup declarations.
5240 * (This does not apply to references that occur in external parameter
5241 * entities or to the external subset.)
5242 */
5243void
5244xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5245 GROW;
5246 xmlParseElementDecl(ctxt);
5247 xmlParseAttributeListDecl(ctxt);
5248 xmlParseEntityDecl(ctxt);
5249 xmlParseNotationDecl(ctxt);
5250 xmlParsePI(ctxt);
5251 xmlParseComment(ctxt);
5252 /*
5253 * This is only for internal subset. On external entities,
5254 * the replacement is done before parsing stage
5255 */
5256 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5257 xmlParsePEReference(ctxt);
5258
5259 /*
5260 * Conditional sections are allowed from entities included
5261 * by PE References in the internal subset.
5262 */
5263 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5264 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5265 xmlParseConditionalSections(ctxt);
5266 }
5267 }
5268
5269 ctxt->instate = XML_PARSER_DTD;
5270}
5271
5272/**
5273 * xmlParseTextDecl:
5274 * @ctxt: an XML parser context
5275 *
5276 * parse an XML declaration header for external entities
5277 *
5278 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5279 *
5280 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5281 */
5282
5283void
5284xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5285 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005286 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005287
5288 /*
5289 * We know that '<?xml' is here.
5290 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005291 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005292 SKIP(5);
5293 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005294 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005295 return;
5296 }
5297
William M. Brack76e95df2003-10-18 16:20:14 +00005298 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5300 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005301 }
5302 SKIP_BLANKS;
5303
5304 /*
5305 * We may have the VersionInfo here.
5306 */
5307 version = xmlParseVersionInfo(ctxt);
5308 if (version == NULL)
5309 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005310 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005311 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005312 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5313 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005314 }
5315 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005316 ctxt->input->version = version;
5317
5318 /*
5319 * We must have the encoding declaration
5320 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005321 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005322 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5323 /*
5324 * The XML REC instructs us to stop parsing right here
5325 */
5326 return;
5327 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005328 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5329 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5330 "Missing encoding in text declaration\n");
5331 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005332
5333 SKIP_BLANKS;
5334 if ((RAW == '?') && (NXT(1) == '>')) {
5335 SKIP(2);
5336 } else if (RAW == '>') {
5337 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005338 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005339 NEXT;
5340 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005341 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005342 MOVETO_ENDTAG(CUR_PTR);
5343 NEXT;
5344 }
5345}
5346
5347/**
Owen Taylor3473f882001-02-23 17:55:21 +00005348 * xmlParseExternalSubset:
5349 * @ctxt: an XML parser context
5350 * @ExternalID: the external identifier
5351 * @SystemID: the system identifier (or URL)
5352 *
5353 * parse Markup declarations from an external subset
5354 *
5355 * [30] extSubset ::= textDecl? extSubsetDecl
5356 *
5357 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5358 */
5359void
5360xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5361 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005362 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005363 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005364 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005365 xmlParseTextDecl(ctxt);
5366 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5367 /*
5368 * The XML REC instructs us to stop parsing right here
5369 */
5370 ctxt->instate = XML_PARSER_EOF;
5371 return;
5372 }
5373 }
5374 if (ctxt->myDoc == NULL) {
5375 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5376 }
5377 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5378 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5379
5380 ctxt->instate = XML_PARSER_DTD;
5381 ctxt->external = 1;
5382 while (((RAW == '<') && (NXT(1) == '?')) ||
5383 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005384 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005385 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005386 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005387
5388 GROW;
5389 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5390 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005391 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005392 NEXT;
5393 } else if (RAW == '%') {
5394 xmlParsePEReference(ctxt);
5395 } else
5396 xmlParseMarkupDecl(ctxt);
5397
5398 /*
5399 * Pop-up of finished entities.
5400 */
5401 while ((RAW == 0) && (ctxt->inputNr > 1))
5402 xmlPopInput(ctxt);
5403
Daniel Veillardfdc91562002-07-01 21:52:03 +00005404 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005405 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005406 break;
5407 }
5408 }
5409
5410 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005411 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005412 }
5413
5414}
5415
5416/**
5417 * xmlParseReference:
5418 * @ctxt: an XML parser context
5419 *
5420 * parse and handle entity references in content, depending on the SAX
5421 * interface, this may end-up in a call to character() if this is a
5422 * CharRef, a predefined entity, if there is no reference() callback.
5423 * or if the parser was asked to switch to that mode.
5424 *
5425 * [67] Reference ::= EntityRef | CharRef
5426 */
5427void
5428xmlParseReference(xmlParserCtxtPtr ctxt) {
5429 xmlEntityPtr ent;
5430 xmlChar *val;
5431 if (RAW != '&') return;
5432
5433 if (NXT(1) == '#') {
5434 int i = 0;
5435 xmlChar out[10];
5436 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005437 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005438
5439 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5440 /*
5441 * So we are using non-UTF-8 buffers
5442 * Check that the char fit on 8bits, if not
5443 * generate a CharRef.
5444 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005445 if (value <= 0xFF) {
5446 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005447 out[1] = 0;
5448 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5449 (!ctxt->disableSAX))
5450 ctxt->sax->characters(ctxt->userData, out, 1);
5451 } else {
5452 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005453 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005454 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005455 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005456 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5457 (!ctxt->disableSAX))
5458 ctxt->sax->reference(ctxt->userData, out);
5459 }
5460 } else {
5461 /*
5462 * Just encode the value in UTF-8
5463 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005464 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005465 out[i] = 0;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5467 (!ctxt->disableSAX))
5468 ctxt->sax->characters(ctxt->userData, out, i);
5469 }
5470 } else {
5471 ent = xmlParseEntityRef(ctxt);
5472 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005473 if (!ctxt->wellFormed)
5474 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005475 if ((ent->name != NULL) &&
5476 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5477 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005478 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005479
5480
5481 /*
5482 * The first reference to the entity trigger a parsing phase
5483 * where the ent->children is filled with the result from
5484 * the parsing.
5485 */
5486 if (ent->children == NULL) {
5487 xmlChar *value;
5488 value = ent->content;
5489
5490 /*
5491 * Check that this entity is well formed
5492 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005493 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005494 (value[1] == 0) && (value[0] == '<') &&
5495 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5496 /*
5497 * DONE: get definite answer on this !!!
5498 * Lots of entity decls are used to declare a single
5499 * char
5500 * <!ENTITY lt "<">
5501 * Which seems to be valid since
5502 * 2.4: The ampersand character (&) and the left angle
5503 * bracket (<) may appear in their literal form only
5504 * when used ... They are also legal within the literal
5505 * entity value of an internal entity declaration;i
5506 * see "4.3.2 Well-Formed Parsed Entities".
5507 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5508 * Looking at the OASIS test suite and James Clark
5509 * tests, this is broken. However the XML REC uses
5510 * it. Is the XML REC not well-formed ????
5511 * This is a hack to avoid this problem
5512 *
5513 * ANSWER: since lt gt amp .. are already defined,
5514 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005515 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005516 * is lousy but acceptable.
5517 */
5518 list = xmlNewDocText(ctxt->myDoc, value);
5519 if (list != NULL) {
5520 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5521 (ent->children == NULL)) {
5522 ent->children = list;
5523 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005524 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 list->parent = (xmlNodePtr) ent;
5526 } else {
5527 xmlFreeNodeList(list);
5528 }
5529 } else if (list != NULL) {
5530 xmlFreeNodeList(list);
5531 }
5532 } else {
5533 /*
5534 * 4.3.2: An internal general parsed entity is well-formed
5535 * if its replacement text matches the production labeled
5536 * content.
5537 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005538
5539 void *user_data;
5540 /*
5541 * This is a bit hackish but this seems the best
5542 * way to make sure both SAX and DOM entity support
5543 * behaves okay.
5544 */
5545 if (ctxt->userData == ctxt)
5546 user_data = NULL;
5547 else
5548 user_data = ctxt->userData;
5549
Owen Taylor3473f882001-02-23 17:55:21 +00005550 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5551 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005552 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5553 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 ctxt->depth--;
5555 } else if (ent->etype ==
5556 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5557 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005558 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005559 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005560 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 ctxt->depth--;
5562 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005563 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005564 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5565 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005566 }
5567 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005568 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005569 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005570 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005571 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5572 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005573 (ent->children == NULL)) {
5574 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005575 if (ctxt->replaceEntities) {
5576 /*
5577 * Prune it directly in the generated document
5578 * except for single text nodes.
5579 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005580 if (((list->type == XML_TEXT_NODE) &&
5581 (list->next == NULL)) ||
5582 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005583 list->parent = (xmlNodePtr) ent;
5584 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005585 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005586 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005587 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005588 while (list != NULL) {
5589 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005590 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005591 if (list->next == NULL)
5592 ent->last = list;
5593 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005594 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005595 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005596#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005597 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5598 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005599#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005600 }
5601 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005602 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005603 while (list != NULL) {
5604 list->parent = (xmlNodePtr) ent;
5605 if (list->next == NULL)
5606 ent->last = list;
5607 list = list->next;
5608 }
Owen Taylor3473f882001-02-23 17:55:21 +00005609 }
5610 } else {
5611 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005612 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005613 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005614 } else if ((ret != XML_ERR_OK) &&
5615 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005616 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005617 } else if (list != NULL) {
5618 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005619 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005620 }
5621 }
5622 }
5623 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5624 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5625 /*
5626 * Create a node.
5627 */
5628 ctxt->sax->reference(ctxt->userData, ent->name);
5629 return;
5630 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005631 /*
5632 * There is a problem on the handling of _private for entities
5633 * (bug 155816): Should we copy the content of the field from
5634 * the entity (possibly overwriting some value set by the user
5635 * when a copy is created), should we leave it alone, or should
5636 * we try to take care of different situations? The problem
5637 * is exacerbated by the usage of this field by the xmlReader.
5638 * To fix this bug, we look at _private on the created node
5639 * and, if it's NULL, we copy in whatever was in the entity.
5640 * If it's not NULL we leave it alone. This is somewhat of a
5641 * hack - maybe we should have further tests to determine
5642 * what to do.
5643 */
Owen Taylor3473f882001-02-23 17:55:21 +00005644 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5645 /*
5646 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005647 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005648 * In the first occurrence list contains the replacement.
5649 * progressive == 2 means we are operating on the Reader
5650 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005651 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005652 if (((list == NULL) && (ent->owner == 0)) ||
5653 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005654 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005655
5656 /*
5657 * when operating on a reader, the entities definitions
5658 * are always owning the entities subtree.
5659 if (ctxt->parseMode == XML_PARSE_READER)
5660 ent->owner = 1;
5661 */
5662
Daniel Veillard62f313b2001-07-04 19:49:14 +00005663 cur = ent->children;
5664 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005665 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005666 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005667 if (nw->_private == NULL)
5668 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005669 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005670 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005671 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005672 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005673 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005674 if (cur == ent->last) {
5675 /*
5676 * needed to detect some strange empty
5677 * node cases in the reader tests
5678 */
5679 if ((ctxt->parseMode == XML_PARSE_READER) &&
5680 (nw->type == XML_ELEMENT_NODE) &&
5681 (nw->children == NULL))
5682 nw->extra = 1;
5683
Daniel Veillard62f313b2001-07-04 19:49:14 +00005684 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005685 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005686 cur = cur->next;
5687 }
Daniel Veillard81273902003-09-30 00:43:48 +00005688#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005689 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005690 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005691#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005692 } else if (list == NULL) {
5693 xmlNodePtr nw = NULL, cur, next, last,
5694 firstChild = NULL;
5695 /*
5696 * Copy the entity child list and make it the new
5697 * entity child list. The goal is to make sure any
5698 * ID or REF referenced will be the one from the
5699 * document content and not the entity copy.
5700 */
5701 cur = ent->children;
5702 ent->children = NULL;
5703 last = ent->last;
5704 ent->last = NULL;
5705 while (cur != NULL) {
5706 next = cur->next;
5707 cur->next = NULL;
5708 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005709 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005710 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005711 if (nw->_private == NULL)
5712 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005713 if (firstChild == NULL){
5714 firstChild = cur;
5715 }
5716 xmlAddChild((xmlNodePtr) ent, nw);
5717 xmlAddChild(ctxt->node, cur);
5718 }
5719 if (cur == last)
5720 break;
5721 cur = next;
5722 }
5723 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005724#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005725 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5726 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005727#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005728 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005729 const xmlChar *nbktext;
5730
Daniel Veillard62f313b2001-07-04 19:49:14 +00005731 /*
5732 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005733 * node with a possible previous text one which
5734 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005735 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005736 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5737 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005738 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005739 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005740 if ((ent->last != ent->children) &&
5741 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005742 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005743 xmlAddChildList(ctxt->node, ent->children);
5744 }
5745
Owen Taylor3473f882001-02-23 17:55:21 +00005746 /*
5747 * This is to avoid a nasty side effect, see
5748 * characters() in SAX.c
5749 */
5750 ctxt->nodemem = 0;
5751 ctxt->nodelen = 0;
5752 return;
5753 } else {
5754 /*
5755 * Probably running in SAX mode
5756 */
5757 xmlParserInputPtr input;
5758
5759 input = xmlNewEntityInputStream(ctxt, ent);
5760 xmlPushInput(ctxt, input);
5761 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005762 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5763 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005764 xmlParseTextDecl(ctxt);
5765 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5766 /*
5767 * The XML REC instructs us to stop parsing right here
5768 */
5769 ctxt->instate = XML_PARSER_EOF;
5770 return;
5771 }
5772 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005773 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5774 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 }
5777 return;
5778 }
5779 }
5780 } else {
5781 val = ent->content;
5782 if (val == NULL) return;
5783 /*
5784 * inline the entity.
5785 */
5786 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5787 (!ctxt->disableSAX))
5788 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5789 }
5790 }
5791}
5792
5793/**
5794 * xmlParseEntityRef:
5795 * @ctxt: an XML parser context
5796 *
5797 * parse ENTITY references declarations
5798 *
5799 * [68] EntityRef ::= '&' Name ';'
5800 *
5801 * [ WFC: Entity Declared ]
5802 * In a document without any DTD, a document with only an internal DTD
5803 * subset which contains no parameter entity references, or a document
5804 * with "standalone='yes'", the Name given in the entity reference
5805 * must match that in an entity declaration, except that well-formed
5806 * documents need not declare any of the following entities: amp, lt,
5807 * gt, apos, quot. The declaration of a parameter entity must precede
5808 * any reference to it. Similarly, the declaration of a general entity
5809 * must precede any reference to it which appears in a default value in an
5810 * attribute-list declaration. Note that if entities are declared in the
5811 * external subset or in external parameter entities, a non-validating
5812 * processor is not obligated to read and process their declarations;
5813 * for such documents, the rule that an entity must be declared is a
5814 * well-formedness constraint only if standalone='yes'.
5815 *
5816 * [ WFC: Parsed Entity ]
5817 * An entity reference must not contain the name of an unparsed entity
5818 *
5819 * Returns the xmlEntityPtr if found, or NULL otherwise.
5820 */
5821xmlEntityPtr
5822xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005823 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005824 xmlEntityPtr ent = NULL;
5825
5826 GROW;
5827
5828 if (RAW == '&') {
5829 NEXT;
5830 name = xmlParseName(ctxt);
5831 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005832 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5833 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005834 } else {
5835 if (RAW == ';') {
5836 NEXT;
5837 /*
5838 * Ask first SAX for entity resolution, otherwise try the
5839 * predefined set.
5840 */
5841 if (ctxt->sax != NULL) {
5842 if (ctxt->sax->getEntity != NULL)
5843 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005844 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005845 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005846 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5847 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005848 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005849 }
Owen Taylor3473f882001-02-23 17:55:21 +00005850 }
5851 /*
5852 * [ WFC: Entity Declared ]
5853 * In a document without any DTD, a document with only an
5854 * internal DTD subset which contains no parameter entity
5855 * references, or a document with "standalone='yes'", the
5856 * Name given in the entity reference must match that in an
5857 * entity declaration, except that well-formed documents
5858 * need not declare any of the following entities: amp, lt,
5859 * gt, apos, quot.
5860 * The declaration of a parameter entity must precede any
5861 * reference to it.
5862 * Similarly, the declaration of a general entity must
5863 * precede any reference to it which appears in a default
5864 * value in an attribute-list declaration. Note that if
5865 * entities are declared in the external subset or in
5866 * external parameter entities, a non-validating processor
5867 * is not obligated to read and process their declarations;
5868 * for such documents, the rule that an entity must be
5869 * declared is a well-formedness constraint only if
5870 * standalone='yes'.
5871 */
5872 if (ent == NULL) {
5873 if ((ctxt->standalone == 1) ||
5874 ((ctxt->hasExternalSubset == 0) &&
5875 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005876 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005877 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005879 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005880 "Entity '%s' not defined\n", name);
5881 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005882 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884
5885 /*
5886 * [ WFC: Parsed Entity ]
5887 * An entity reference must not contain the name of an
5888 * unparsed entity
5889 */
5890 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005891 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005892 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005893 }
5894
5895 /*
5896 * [ WFC: No External Entity References ]
5897 * Attribute values cannot contain direct or indirect
5898 * entity references to external entities.
5899 */
5900 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5901 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005902 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5903 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005904 }
5905 /*
5906 * [ WFC: No < in Attribute Values ]
5907 * The replacement text of any entity referred to directly or
5908 * indirectly in an attribute value (other than "&lt;") must
5909 * not contain a <.
5910 */
5911 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5912 (ent != NULL) &&
5913 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5914 (ent->content != NULL) &&
5915 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005916 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005917 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 }
5919
5920 /*
5921 * Internal check, no parameter entities here ...
5922 */
5923 else {
5924 switch (ent->etype) {
5925 case XML_INTERNAL_PARAMETER_ENTITY:
5926 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005927 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5928 "Attempt to reference the parameter entity '%s'\n",
5929 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005930 break;
5931 default:
5932 break;
5933 }
5934 }
5935
5936 /*
5937 * [ WFC: No Recursion ]
5938 * A parsed entity must not contain a recursive reference
5939 * to itself, either directly or indirectly.
5940 * Done somewhere else
5941 */
5942
5943 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005944 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005945 }
Owen Taylor3473f882001-02-23 17:55:21 +00005946 }
5947 }
5948 return(ent);
5949}
5950
5951/**
5952 * xmlParseStringEntityRef:
5953 * @ctxt: an XML parser context
5954 * @str: a pointer to an index in the string
5955 *
5956 * parse ENTITY references declarations, but this version parses it from
5957 * a string value.
5958 *
5959 * [68] EntityRef ::= '&' Name ';'
5960 *
5961 * [ WFC: Entity Declared ]
5962 * In a document without any DTD, a document with only an internal DTD
5963 * subset which contains no parameter entity references, or a document
5964 * with "standalone='yes'", the Name given in the entity reference
5965 * must match that in an entity declaration, except that well-formed
5966 * documents need not declare any of the following entities: amp, lt,
5967 * gt, apos, quot. The declaration of a parameter entity must precede
5968 * any reference to it. Similarly, the declaration of a general entity
5969 * must precede any reference to it which appears in a default value in an
5970 * attribute-list declaration. Note that if entities are declared in the
5971 * external subset or in external parameter entities, a non-validating
5972 * processor is not obligated to read and process their declarations;
5973 * for such documents, the rule that an entity must be declared is a
5974 * well-formedness constraint only if standalone='yes'.
5975 *
5976 * [ WFC: Parsed Entity ]
5977 * An entity reference must not contain the name of an unparsed entity
5978 *
5979 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5980 * is updated to the current location in the string.
5981 */
5982xmlEntityPtr
5983xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5984 xmlChar *name;
5985 const xmlChar *ptr;
5986 xmlChar cur;
5987 xmlEntityPtr ent = NULL;
5988
5989 if ((str == NULL) || (*str == NULL))
5990 return(NULL);
5991 ptr = *str;
5992 cur = *ptr;
5993 if (cur == '&') {
5994 ptr++;
5995 cur = *ptr;
5996 name = xmlParseStringName(ctxt, &ptr);
5997 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005998 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006000 } else {
6001 if (*ptr == ';') {
6002 ptr++;
6003 /*
6004 * Ask first SAX for entity resolution, otherwise try the
6005 * predefined set.
6006 */
6007 if (ctxt->sax != NULL) {
6008 if (ctxt->sax->getEntity != NULL)
6009 ent = ctxt->sax->getEntity(ctxt->userData, name);
6010 if (ent == NULL)
6011 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006012 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006013 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006014 }
Owen Taylor3473f882001-02-23 17:55:21 +00006015 }
6016 /*
6017 * [ WFC: Entity Declared ]
6018 * In a document without any DTD, a document with only an
6019 * internal DTD subset which contains no parameter entity
6020 * references, or a document with "standalone='yes'", the
6021 * Name given in the entity reference must match that in an
6022 * entity declaration, except that well-formed documents
6023 * need not declare any of the following entities: amp, lt,
6024 * gt, apos, quot.
6025 * The declaration of a parameter entity must precede any
6026 * reference to it.
6027 * Similarly, the declaration of a general entity must
6028 * precede any reference to it which appears in a default
6029 * value in an attribute-list declaration. Note that if
6030 * entities are declared in the external subset or in
6031 * external parameter entities, a non-validating processor
6032 * is not obligated to read and process their declarations;
6033 * for such documents, the rule that an entity must be
6034 * declared is a well-formedness constraint only if
6035 * standalone='yes'.
6036 */
6037 if (ent == NULL) {
6038 if ((ctxt->standalone == 1) ||
6039 ((ctxt->hasExternalSubset == 0) &&
6040 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006041 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006042 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006043 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006044 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006045 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006046 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006048 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006049 }
6050
6051 /*
6052 * [ WFC: Parsed Entity ]
6053 * An entity reference must not contain the name of an
6054 * unparsed entity
6055 */
6056 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006057 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006058 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006059 }
6060
6061 /*
6062 * [ WFC: No External Entity References ]
6063 * Attribute values cannot contain direct or indirect
6064 * entity references to external entities.
6065 */
6066 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6067 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006068 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006069 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 }
6071 /*
6072 * [ WFC: No < in Attribute Values ]
6073 * The replacement text of any entity referred to directly or
6074 * indirectly in an attribute value (other than "&lt;") must
6075 * not contain a <.
6076 */
6077 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6078 (ent != NULL) &&
6079 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6080 (ent->content != NULL) &&
6081 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006082 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6083 "'<' in entity '%s' is not allowed in attributes values\n",
6084 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006085 }
6086
6087 /*
6088 * Internal check, no parameter entities here ...
6089 */
6090 else {
6091 switch (ent->etype) {
6092 case XML_INTERNAL_PARAMETER_ENTITY:
6093 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006094 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6095 "Attempt to reference the parameter entity '%s'\n",
6096 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006097 break;
6098 default:
6099 break;
6100 }
6101 }
6102
6103 /*
6104 * [ WFC: No Recursion ]
6105 * A parsed entity must not contain a recursive reference
6106 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006107 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006108 */
6109
6110 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006111 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006112 }
6113 xmlFree(name);
6114 }
6115 }
6116 *str = ptr;
6117 return(ent);
6118}
6119
6120/**
6121 * xmlParsePEReference:
6122 * @ctxt: an XML parser context
6123 *
6124 * parse PEReference declarations
6125 * The entity content is handled directly by pushing it's content as
6126 * a new input stream.
6127 *
6128 * [69] PEReference ::= '%' Name ';'
6129 *
6130 * [ WFC: No Recursion ]
6131 * A parsed entity must not contain a recursive
6132 * reference to itself, either directly or indirectly.
6133 *
6134 * [ WFC: Entity Declared ]
6135 * In a document without any DTD, a document with only an internal DTD
6136 * subset which contains no parameter entity references, or a document
6137 * with "standalone='yes'", ... ... The declaration of a parameter
6138 * entity must precede any reference to it...
6139 *
6140 * [ VC: Entity Declared ]
6141 * In a document with an external subset or external parameter entities
6142 * with "standalone='no'", ... ... The declaration of a parameter entity
6143 * must precede any reference to it...
6144 *
6145 * [ WFC: In DTD ]
6146 * Parameter-entity references may only appear in the DTD.
6147 * NOTE: misleading but this is handled.
6148 */
6149void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006150xmlParsePEReference(xmlParserCtxtPtr ctxt)
6151{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006152 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006153 xmlEntityPtr entity = NULL;
6154 xmlParserInputPtr input;
6155
6156 if (RAW == '%') {
6157 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006158 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006159 if (name == NULL) {
6160 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6161 "xmlParsePEReference: no name\n");
6162 } else {
6163 if (RAW == ';') {
6164 NEXT;
6165 if ((ctxt->sax != NULL) &&
6166 (ctxt->sax->getParameterEntity != NULL))
6167 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6168 name);
6169 if (entity == NULL) {
6170 /*
6171 * [ WFC: Entity Declared ]
6172 * In a document without any DTD, a document with only an
6173 * internal DTD subset which contains no parameter entity
6174 * references, or a document with "standalone='yes'", ...
6175 * ... The declaration of a parameter entity must precede
6176 * any reference to it...
6177 */
6178 if ((ctxt->standalone == 1) ||
6179 ((ctxt->hasExternalSubset == 0) &&
6180 (ctxt->hasPErefs == 0))) {
6181 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6182 "PEReference: %%%s; not found\n",
6183 name);
6184 } else {
6185 /*
6186 * [ VC: Entity Declared ]
6187 * In a document with an external subset or external
6188 * parameter entities with "standalone='no'", ...
6189 * ... The declaration of a parameter entity must
6190 * precede any reference to it...
6191 */
6192 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6193 "PEReference: %%%s; not found\n",
6194 name, NULL);
6195 ctxt->valid = 0;
6196 }
6197 } else {
6198 /*
6199 * Internal checking in case the entity quest barfed
6200 */
6201 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6202 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6203 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6204 "Internal: %%%s; is not a parameter entity\n",
6205 name, NULL);
6206 } else if (ctxt->input->free != deallocblankswrapper) {
6207 input =
6208 xmlNewBlanksWrapperInputStream(ctxt, entity);
6209 xmlPushInput(ctxt, input);
6210 } else {
6211 /*
6212 * TODO !!!
6213 * handle the extra spaces added before and after
6214 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6215 */
6216 input = xmlNewEntityInputStream(ctxt, entity);
6217 xmlPushInput(ctxt, input);
6218 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006219 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006220 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006221 xmlParseTextDecl(ctxt);
6222 if (ctxt->errNo ==
6223 XML_ERR_UNSUPPORTED_ENCODING) {
6224 /*
6225 * The XML REC instructs us to stop parsing
6226 * right here
6227 */
6228 ctxt->instate = XML_PARSER_EOF;
6229 return;
6230 }
6231 }
6232 }
6233 }
6234 ctxt->hasPErefs = 1;
6235 } else {
6236 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6237 }
6238 }
Owen Taylor3473f882001-02-23 17:55:21 +00006239 }
6240}
6241
6242/**
6243 * xmlParseStringPEReference:
6244 * @ctxt: an XML parser context
6245 * @str: a pointer to an index in the string
6246 *
6247 * parse PEReference declarations
6248 *
6249 * [69] PEReference ::= '%' Name ';'
6250 *
6251 * [ WFC: No Recursion ]
6252 * A parsed entity must not contain a recursive
6253 * reference to itself, either directly or indirectly.
6254 *
6255 * [ WFC: Entity Declared ]
6256 * In a document without any DTD, a document with only an internal DTD
6257 * subset which contains no parameter entity references, or a document
6258 * with "standalone='yes'", ... ... The declaration of a parameter
6259 * entity must precede any reference to it...
6260 *
6261 * [ VC: Entity Declared ]
6262 * In a document with an external subset or external parameter entities
6263 * with "standalone='no'", ... ... The declaration of a parameter entity
6264 * must precede any reference to it...
6265 *
6266 * [ WFC: In DTD ]
6267 * Parameter-entity references may only appear in the DTD.
6268 * NOTE: misleading but this is handled.
6269 *
6270 * Returns the string of the entity content.
6271 * str is updated to the current value of the index
6272 */
6273xmlEntityPtr
6274xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6275 const xmlChar *ptr;
6276 xmlChar cur;
6277 xmlChar *name;
6278 xmlEntityPtr entity = NULL;
6279
6280 if ((str == NULL) || (*str == NULL)) return(NULL);
6281 ptr = *str;
6282 cur = *ptr;
6283 if (cur == '%') {
6284 ptr++;
6285 cur = *ptr;
6286 name = xmlParseStringName(ctxt, &ptr);
6287 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6289 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006290 } else {
6291 cur = *ptr;
6292 if (cur == ';') {
6293 ptr++;
6294 cur = *ptr;
6295 if ((ctxt->sax != NULL) &&
6296 (ctxt->sax->getParameterEntity != NULL))
6297 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6298 name);
6299 if (entity == NULL) {
6300 /*
6301 * [ WFC: Entity Declared ]
6302 * In a document without any DTD, a document with only an
6303 * internal DTD subset which contains no parameter entity
6304 * references, or a document with "standalone='yes'", ...
6305 * ... The declaration of a parameter entity must precede
6306 * any reference to it...
6307 */
6308 if ((ctxt->standalone == 1) ||
6309 ((ctxt->hasExternalSubset == 0) &&
6310 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006311 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006312 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 } else {
6314 /*
6315 * [ VC: Entity Declared ]
6316 * In a document with an external subset or external
6317 * parameter entities with "standalone='no'", ...
6318 * ... The declaration of a parameter entity must
6319 * precede any reference to it...
6320 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006321 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6322 "PEReference: %%%s; not found\n",
6323 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 ctxt->valid = 0;
6325 }
6326 } else {
6327 /*
6328 * Internal checking in case the entity quest barfed
6329 */
6330 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6331 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006332 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6333 "%%%s; is not a parameter entity\n",
6334 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 }
6336 }
6337 ctxt->hasPErefs = 1;
6338 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006339 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006340 }
6341 xmlFree(name);
6342 }
6343 }
6344 *str = ptr;
6345 return(entity);
6346}
6347
6348/**
6349 * xmlParseDocTypeDecl:
6350 * @ctxt: an XML parser context
6351 *
6352 * parse a DOCTYPE declaration
6353 *
6354 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6355 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6356 *
6357 * [ VC: Root Element Type ]
6358 * The Name in the document type declaration must match the element
6359 * type of the root element.
6360 */
6361
6362void
6363xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006364 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006365 xmlChar *ExternalID = NULL;
6366 xmlChar *URI = NULL;
6367
6368 /*
6369 * We know that '<!DOCTYPE' has been detected.
6370 */
6371 SKIP(9);
6372
6373 SKIP_BLANKS;
6374
6375 /*
6376 * Parse the DOCTYPE name.
6377 */
6378 name = xmlParseName(ctxt);
6379 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006380 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6381 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006382 }
6383 ctxt->intSubName = name;
6384
6385 SKIP_BLANKS;
6386
6387 /*
6388 * Check for SystemID and ExternalID
6389 */
6390 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6391
6392 if ((URI != NULL) || (ExternalID != NULL)) {
6393 ctxt->hasExternalSubset = 1;
6394 }
6395 ctxt->extSubURI = URI;
6396 ctxt->extSubSystem = ExternalID;
6397
6398 SKIP_BLANKS;
6399
6400 /*
6401 * Create and update the internal subset.
6402 */
6403 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6404 (!ctxt->disableSAX))
6405 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6406
6407 /*
6408 * Is there any internal subset declarations ?
6409 * they are handled separately in xmlParseInternalSubset()
6410 */
6411 if (RAW == '[')
6412 return;
6413
6414 /*
6415 * We should be at the end of the DOCTYPE declaration.
6416 */
6417 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006418 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006419 }
6420 NEXT;
6421}
6422
6423/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006424 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006425 * @ctxt: an XML parser context
6426 *
6427 * parse the internal subset declaration
6428 *
6429 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6430 */
6431
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006432static void
Owen Taylor3473f882001-02-23 17:55:21 +00006433xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6434 /*
6435 * Is there any DTD definition ?
6436 */
6437 if (RAW == '[') {
6438 ctxt->instate = XML_PARSER_DTD;
6439 NEXT;
6440 /*
6441 * Parse the succession of Markup declarations and
6442 * PEReferences.
6443 * Subsequence (markupdecl | PEReference | S)*
6444 */
6445 while (RAW != ']') {
6446 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006447 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006448
6449 SKIP_BLANKS;
6450 xmlParseMarkupDecl(ctxt);
6451 xmlParsePEReference(ctxt);
6452
6453 /*
6454 * Pop-up of finished entities.
6455 */
6456 while ((RAW == 0) && (ctxt->inputNr > 1))
6457 xmlPopInput(ctxt);
6458
6459 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006460 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006461 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006462 break;
6463 }
6464 }
6465 if (RAW == ']') {
6466 NEXT;
6467 SKIP_BLANKS;
6468 }
6469 }
6470
6471 /*
6472 * We should be at the end of the DOCTYPE declaration.
6473 */
6474 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006475 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 }
6477 NEXT;
6478}
6479
Daniel Veillard81273902003-09-30 00:43:48 +00006480#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006481/**
6482 * xmlParseAttribute:
6483 * @ctxt: an XML parser context
6484 * @value: a xmlChar ** used to store the value of the attribute
6485 *
6486 * parse an attribute
6487 *
6488 * [41] Attribute ::= Name Eq AttValue
6489 *
6490 * [ WFC: No External Entity References ]
6491 * Attribute values cannot contain direct or indirect entity references
6492 * to external entities.
6493 *
6494 * [ WFC: No < in Attribute Values ]
6495 * The replacement text of any entity referred to directly or indirectly in
6496 * an attribute value (other than "&lt;") must not contain a <.
6497 *
6498 * [ VC: Attribute Value Type ]
6499 * The attribute must have been declared; the value must be of the type
6500 * declared for it.
6501 *
6502 * [25] Eq ::= S? '=' S?
6503 *
6504 * With namespace:
6505 *
6506 * [NS 11] Attribute ::= QName Eq AttValue
6507 *
6508 * Also the case QName == xmlns:??? is handled independently as a namespace
6509 * definition.
6510 *
6511 * Returns the attribute name, and the value in *value.
6512 */
6513
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006514const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006515xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006516 const xmlChar *name;
6517 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006518
6519 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006520 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006521 name = xmlParseName(ctxt);
6522 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006523 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006524 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006525 return(NULL);
6526 }
6527
6528 /*
6529 * read the value
6530 */
6531 SKIP_BLANKS;
6532 if (RAW == '=') {
6533 NEXT;
6534 SKIP_BLANKS;
6535 val = xmlParseAttValue(ctxt);
6536 ctxt->instate = XML_PARSER_CONTENT;
6537 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006538 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006539 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006540 return(NULL);
6541 }
6542
6543 /*
6544 * Check that xml:lang conforms to the specification
6545 * No more registered as an error, just generate a warning now
6546 * since this was deprecated in XML second edition
6547 */
6548 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6549 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006550 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6551 "Malformed value for xml:lang : %s\n",
6552 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006553 }
6554 }
6555
6556 /*
6557 * Check that xml:space conforms to the specification
6558 */
6559 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6560 if (xmlStrEqual(val, BAD_CAST "default"))
6561 *(ctxt->space) = 0;
6562 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6563 *(ctxt->space) = 1;
6564 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006565 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006566"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006567 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 }
6569 }
6570
6571 *value = val;
6572 return(name);
6573}
6574
6575/**
6576 * xmlParseStartTag:
6577 * @ctxt: an XML parser context
6578 *
6579 * parse a start of tag either for rule element or
6580 * EmptyElement. In both case we don't parse the tag closing chars.
6581 *
6582 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6583 *
6584 * [ WFC: Unique Att Spec ]
6585 * No attribute name may appear more than once in the same start-tag or
6586 * empty-element tag.
6587 *
6588 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6589 *
6590 * [ WFC: Unique Att Spec ]
6591 * No attribute name may appear more than once in the same start-tag or
6592 * empty-element tag.
6593 *
6594 * With namespace:
6595 *
6596 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6597 *
6598 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6599 *
6600 * Returns the element name parsed
6601 */
6602
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006603const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006604xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006605 const xmlChar *name;
6606 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006607 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006608 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006609 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006610 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006611 int i;
6612
6613 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006614 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006615
6616 name = xmlParseName(ctxt);
6617 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006618 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006619 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006620 return(NULL);
6621 }
6622
6623 /*
6624 * Now parse the attributes, it ends up with the ending
6625 *
6626 * (S Attribute)* S?
6627 */
6628 SKIP_BLANKS;
6629 GROW;
6630
Daniel Veillard21a0f912001-02-25 19:54:14 +00006631 while ((RAW != '>') &&
6632 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006633 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006634 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006635 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006636
6637 attname = xmlParseAttribute(ctxt, &attvalue);
6638 if ((attname != NULL) && (attvalue != NULL)) {
6639 /*
6640 * [ WFC: Unique Att Spec ]
6641 * No attribute name may appear more than once in the same
6642 * start-tag or empty-element tag.
6643 */
6644 for (i = 0; i < nbatts;i += 2) {
6645 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006646 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 xmlFree(attvalue);
6648 goto failed;
6649 }
6650 }
Owen Taylor3473f882001-02-23 17:55:21 +00006651 /*
6652 * Add the pair to atts
6653 */
6654 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006655 maxatts = 22; /* allow for 10 attrs by default */
6656 atts = (const xmlChar **)
6657 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006658 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006659 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006660 if (attvalue != NULL)
6661 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006662 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006664 ctxt->atts = atts;
6665 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006667 const xmlChar **n;
6668
Owen Taylor3473f882001-02-23 17:55:21 +00006669 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006670 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006671 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006672 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006673 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006674 if (attvalue != NULL)
6675 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006676 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006677 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006678 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006679 ctxt->atts = atts;
6680 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006681 }
6682 atts[nbatts++] = attname;
6683 atts[nbatts++] = attvalue;
6684 atts[nbatts] = NULL;
6685 atts[nbatts + 1] = NULL;
6686 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006687 if (attvalue != NULL)
6688 xmlFree(attvalue);
6689 }
6690
6691failed:
6692
Daniel Veillard3772de32002-12-17 10:31:45 +00006693 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006694 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6695 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006696 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6698 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006699 }
6700 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006701 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6702 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006703 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6704 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006705 break;
6706 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006707 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006708 GROW;
6709 }
6710
6711 /*
6712 * SAX: Start of Element !
6713 */
6714 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006715 (!ctxt->disableSAX)) {
6716 if (nbatts > 0)
6717 ctxt->sax->startElement(ctxt->userData, name, atts);
6718 else
6719 ctxt->sax->startElement(ctxt->userData, name, NULL);
6720 }
Owen Taylor3473f882001-02-23 17:55:21 +00006721
6722 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006723 /* Free only the content strings */
6724 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006725 if (atts[i] != NULL)
6726 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006727 }
6728 return(name);
6729}
6730
6731/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006732 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006733 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006734 * @line: line of the start tag
6735 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006736 *
6737 * parse an end of tag
6738 *
6739 * [42] ETag ::= '</' Name S? '>'
6740 *
6741 * With namespace
6742 *
6743 * [NS 9] ETag ::= '</' QName S? '>'
6744 */
6745
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006746static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006747xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006748 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006749
6750 GROW;
6751 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006752 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006753 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006754 return;
6755 }
6756 SKIP(2);
6757
Daniel Veillard46de64e2002-05-29 08:21:33 +00006758 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006759
6760 /*
6761 * We should definitely be at the ending "S? '>'" part
6762 */
6763 GROW;
6764 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006765 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006766 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006767 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006768 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006769
6770 /*
6771 * [ WFC: Element Type Match ]
6772 * The Name in an element's end-tag must match the element type in the
6773 * start-tag.
6774 *
6775 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006776 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006777 if (name == NULL) name = BAD_CAST "unparseable";
6778 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006779 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006780 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006781 }
6782
6783 /*
6784 * SAX: End of Tag
6785 */
6786 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6787 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006788 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006789
Daniel Veillarde57ec792003-09-10 10:50:59 +00006790 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006791 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006792 return;
6793}
6794
6795/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006796 * xmlParseEndTag:
6797 * @ctxt: an XML parser context
6798 *
6799 * parse an end of tag
6800 *
6801 * [42] ETag ::= '</' Name S? '>'
6802 *
6803 * With namespace
6804 *
6805 * [NS 9] ETag ::= '</' QName S? '>'
6806 */
6807
6808void
6809xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006810 xmlParseEndTag1(ctxt, 0);
6811}
Daniel Veillard81273902003-09-30 00:43:48 +00006812#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006813
6814/************************************************************************
6815 * *
6816 * SAX 2 specific operations *
6817 * *
6818 ************************************************************************/
6819
6820static const xmlChar *
6821xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6822 int len = 0, l;
6823 int c;
6824 int count = 0;
6825
6826 /*
6827 * Handler for more complex cases
6828 */
6829 GROW;
6830 c = CUR_CHAR(l);
6831 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006832 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006833 return(NULL);
6834 }
6835
6836 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006837 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006838 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006839 (IS_COMBINING(c)) ||
6840 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006841 if (count++ > 100) {
6842 count = 0;
6843 GROW;
6844 }
6845 len += l;
6846 NEXTL(l);
6847 c = CUR_CHAR(l);
6848 }
6849 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6850}
6851
6852/*
6853 * xmlGetNamespace:
6854 * @ctxt: an XML parser context
6855 * @prefix: the prefix to lookup
6856 *
6857 * Lookup the namespace name for the @prefix (which ca be NULL)
6858 * The prefix must come from the @ctxt->dict dictionnary
6859 *
6860 * Returns the namespace name or NULL if not bound
6861 */
6862static const xmlChar *
6863xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6864 int i;
6865
Daniel Veillarde57ec792003-09-10 10:50:59 +00006866 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006867 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006868 if (ctxt->nsTab[i] == prefix) {
6869 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6870 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006871 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006872 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006873 return(NULL);
6874}
6875
6876/**
6877 * xmlParseNCName:
6878 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006879 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006880 *
6881 * parse an XML name.
6882 *
6883 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6884 * CombiningChar | Extender
6885 *
6886 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6887 *
6888 * Returns the Name parsed or NULL
6889 */
6890
6891static const xmlChar *
6892xmlParseNCName(xmlParserCtxtPtr ctxt) {
6893 const xmlChar *in;
6894 const xmlChar *ret;
6895 int count = 0;
6896
6897 /*
6898 * Accelerator for simple ASCII names
6899 */
6900 in = ctxt->input->cur;
6901 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6902 ((*in >= 0x41) && (*in <= 0x5A)) ||
6903 (*in == '_')) {
6904 in++;
6905 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6906 ((*in >= 0x41) && (*in <= 0x5A)) ||
6907 ((*in >= 0x30) && (*in <= 0x39)) ||
6908 (*in == '_') || (*in == '-') ||
6909 (*in == '.'))
6910 in++;
6911 if ((*in > 0) && (*in < 0x80)) {
6912 count = in - ctxt->input->cur;
6913 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6914 ctxt->input->cur = in;
6915 ctxt->nbChars += count;
6916 ctxt->input->col += count;
6917 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006918 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006919 }
6920 return(ret);
6921 }
6922 }
6923 return(xmlParseNCNameComplex(ctxt));
6924}
6925
6926/**
6927 * xmlParseQName:
6928 * @ctxt: an XML parser context
6929 * @prefix: pointer to store the prefix part
6930 *
6931 * parse an XML Namespace QName
6932 *
6933 * [6] QName ::= (Prefix ':')? LocalPart
6934 * [7] Prefix ::= NCName
6935 * [8] LocalPart ::= NCName
6936 *
6937 * Returns the Name parsed or NULL
6938 */
6939
6940static const xmlChar *
6941xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6942 const xmlChar *l, *p;
6943
6944 GROW;
6945
6946 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006947 if (l == NULL) {
6948 if (CUR == ':') {
6949 l = xmlParseName(ctxt);
6950 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006951 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6952 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006953 *prefix = NULL;
6954 return(l);
6955 }
6956 }
6957 return(NULL);
6958 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006959 if (CUR == ':') {
6960 NEXT;
6961 p = l;
6962 l = xmlParseNCName(ctxt);
6963 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006964 xmlChar *tmp;
6965
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006966 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6967 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006968 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6969 p = xmlDictLookup(ctxt->dict, tmp, -1);
6970 if (tmp != NULL) xmlFree(tmp);
6971 *prefix = NULL;
6972 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006973 }
6974 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006975 xmlChar *tmp;
6976
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006977 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6978 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006979 NEXT;
6980 tmp = (xmlChar *) xmlParseName(ctxt);
6981 if (tmp != NULL) {
6982 tmp = xmlBuildQName(tmp, l, NULL, 0);
6983 l = xmlDictLookup(ctxt->dict, tmp, -1);
6984 if (tmp != NULL) xmlFree(tmp);
6985 *prefix = p;
6986 return(l);
6987 }
6988 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6989 l = xmlDictLookup(ctxt->dict, tmp, -1);
6990 if (tmp != NULL) xmlFree(tmp);
6991 *prefix = p;
6992 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006993 }
6994 *prefix = p;
6995 } else
6996 *prefix = NULL;
6997 return(l);
6998}
6999
7000/**
7001 * xmlParseQNameAndCompare:
7002 * @ctxt: an XML parser context
7003 * @name: the localname
7004 * @prefix: the prefix, if any.
7005 *
7006 * parse an XML name and compares for match
7007 * (specialized for endtag parsing)
7008 *
7009 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7010 * and the name for mismatch
7011 */
7012
7013static const xmlChar *
7014xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7015 xmlChar const *prefix) {
7016 const xmlChar *cmp = name;
7017 const xmlChar *in;
7018 const xmlChar *ret;
7019 const xmlChar *prefix2;
7020
7021 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7022
7023 GROW;
7024 in = ctxt->input->cur;
7025
7026 cmp = prefix;
7027 while (*in != 0 && *in == *cmp) {
7028 ++in;
7029 ++cmp;
7030 }
7031 if ((*cmp == 0) && (*in == ':')) {
7032 in++;
7033 cmp = name;
7034 while (*in != 0 && *in == *cmp) {
7035 ++in;
7036 ++cmp;
7037 }
William M. Brack76e95df2003-10-18 16:20:14 +00007038 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007039 /* success */
7040 ctxt->input->cur = in;
7041 return((const xmlChar*) 1);
7042 }
7043 }
7044 /*
7045 * all strings coms from the dictionary, equality can be done directly
7046 */
7047 ret = xmlParseQName (ctxt, &prefix2);
7048 if ((ret == name) && (prefix == prefix2))
7049 return((const xmlChar*) 1);
7050 return ret;
7051}
7052
7053/**
7054 * xmlParseAttValueInternal:
7055 * @ctxt: an XML parser context
7056 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007057 * @alloc: whether the attribute was reallocated as a new string
7058 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007059 *
7060 * parse a value for an attribute.
7061 * NOTE: if no normalization is needed, the routine will return pointers
7062 * directly from the data buffer.
7063 *
7064 * 3.3.3 Attribute-Value Normalization:
7065 * Before the value of an attribute is passed to the application or
7066 * checked for validity, the XML processor must normalize it as follows:
7067 * - a character reference is processed by appending the referenced
7068 * character to the attribute value
7069 * - an entity reference is processed by recursively processing the
7070 * replacement text of the entity
7071 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7072 * appending #x20 to the normalized value, except that only a single
7073 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7074 * parsed entity or the literal entity value of an internal parsed entity
7075 * - other characters are processed by appending them to the normalized value
7076 * If the declared value is not CDATA, then the XML processor must further
7077 * process the normalized attribute value by discarding any leading and
7078 * trailing space (#x20) characters, and by replacing sequences of space
7079 * (#x20) characters by a single space (#x20) character.
7080 * All attributes for which no declaration has been read should be treated
7081 * by a non-validating parser as if declared CDATA.
7082 *
7083 * Returns the AttValue parsed or NULL. The value has to be freed by the
7084 * caller if it was copied, this can be detected by val[*len] == 0.
7085 */
7086
7087static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007088xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7089 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007090{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007091 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007092 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007093 xmlChar *ret = NULL;
7094
7095 GROW;
7096 in = (xmlChar *) CUR_PTR;
7097 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007098 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007099 return (NULL);
7100 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007101 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007102
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007103 /*
7104 * try to handle in this routine the most common case where no
7105 * allocation of a new string is required and where content is
7106 * pure ASCII.
7107 */
7108 limit = *in++;
7109 end = ctxt->input->end;
7110 start = in;
7111 if (in >= end) {
7112 const xmlChar *oldbase = ctxt->input->base;
7113 GROW;
7114 if (oldbase != ctxt->input->base) {
7115 long delta = ctxt->input->base - oldbase;
7116 start = start + delta;
7117 in = in + delta;
7118 }
7119 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007120 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007121 if (normalize) {
7122 /*
7123 * Skip any leading spaces
7124 */
7125 while ((in < end) && (*in != limit) &&
7126 ((*in == 0x20) || (*in == 0x9) ||
7127 (*in == 0xA) || (*in == 0xD))) {
7128 in++;
7129 start = in;
7130 if (in >= end) {
7131 const xmlChar *oldbase = ctxt->input->base;
7132 GROW;
7133 if (oldbase != ctxt->input->base) {
7134 long delta = ctxt->input->base - oldbase;
7135 start = start + delta;
7136 in = in + delta;
7137 }
7138 end = ctxt->input->end;
7139 }
7140 }
7141 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7142 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7143 if ((*in++ == 0x20) && (*in == 0x20)) break;
7144 if (in >= end) {
7145 const xmlChar *oldbase = ctxt->input->base;
7146 GROW;
7147 if (oldbase != ctxt->input->base) {
7148 long delta = ctxt->input->base - oldbase;
7149 start = start + delta;
7150 in = in + delta;
7151 }
7152 end = ctxt->input->end;
7153 }
7154 }
7155 last = in;
7156 /*
7157 * skip the trailing blanks
7158 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007159 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007160 while ((in < end) && (*in != limit) &&
7161 ((*in == 0x20) || (*in == 0x9) ||
7162 (*in == 0xA) || (*in == 0xD))) {
7163 in++;
7164 if (in >= end) {
7165 const xmlChar *oldbase = ctxt->input->base;
7166 GROW;
7167 if (oldbase != ctxt->input->base) {
7168 long delta = ctxt->input->base - oldbase;
7169 start = start + delta;
7170 in = in + delta;
7171 last = last + delta;
7172 }
7173 end = ctxt->input->end;
7174 }
7175 }
7176 if (*in != limit) goto need_complex;
7177 } else {
7178 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7179 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7180 in++;
7181 if (in >= end) {
7182 const xmlChar *oldbase = ctxt->input->base;
7183 GROW;
7184 if (oldbase != ctxt->input->base) {
7185 long delta = ctxt->input->base - oldbase;
7186 start = start + delta;
7187 in = in + delta;
7188 }
7189 end = ctxt->input->end;
7190 }
7191 }
7192 last = in;
7193 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007194 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007195 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007196 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007197 *len = last - start;
7198 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007199 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007200 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007201 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007202 }
7203 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007204 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007205 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007206need_complex:
7207 if (alloc) *alloc = 1;
7208 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007209}
7210
7211/**
7212 * xmlParseAttribute2:
7213 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007214 * @pref: the element prefix
7215 * @elem: the element name
7216 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007217 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007218 * @len: an int * to save the length of the attribute
7219 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007220 *
7221 * parse an attribute in the new SAX2 framework.
7222 *
7223 * Returns the attribute name, and the value in *value, .
7224 */
7225
7226static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007227xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7228 const xmlChar *pref, const xmlChar *elem,
7229 const xmlChar **prefix, xmlChar **value,
7230 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007231 const xmlChar *name;
7232 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007233 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007234
7235 *value = NULL;
7236 GROW;
7237 name = xmlParseQName(ctxt, prefix);
7238 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007239 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7240 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007241 return(NULL);
7242 }
7243
7244 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007245 * get the type if needed
7246 */
7247 if (ctxt->attsSpecial != NULL) {
7248 int type;
7249
7250 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7251 pref, elem, *prefix, name);
7252 if (type != 0) normalize = 1;
7253 }
7254
7255 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007256 * read the value
7257 */
7258 SKIP_BLANKS;
7259 if (RAW == '=') {
7260 NEXT;
7261 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007262 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007263 ctxt->instate = XML_PARSER_CONTENT;
7264 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007265 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007266 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007267 return(NULL);
7268 }
7269
7270 /*
7271 * Check that xml:lang conforms to the specification
7272 * No more registered as an error, just generate a warning now
7273 * since this was deprecated in XML second edition
7274 */
7275 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7276 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007277 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7278 "Malformed value for xml:lang : %s\n",
7279 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007280 }
7281 }
7282
7283 /*
7284 * Check that xml:space conforms to the specification
7285 */
7286 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7287 if (xmlStrEqual(val, BAD_CAST "default"))
7288 *(ctxt->space) = 0;
7289 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7290 *(ctxt->space) = 1;
7291 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007292 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007293"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7294 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007295 }
7296 }
7297
7298 *value = val;
7299 return(name);
7300}
7301
7302/**
7303 * xmlParseStartTag2:
7304 * @ctxt: an XML parser context
7305 *
7306 * parse a start of tag either for rule element or
7307 * EmptyElement. In both case we don't parse the tag closing chars.
7308 * This routine is called when running SAX2 parsing
7309 *
7310 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7311 *
7312 * [ WFC: Unique Att Spec ]
7313 * No attribute name may appear more than once in the same start-tag or
7314 * empty-element tag.
7315 *
7316 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7317 *
7318 * [ WFC: Unique Att Spec ]
7319 * No attribute name may appear more than once in the same start-tag or
7320 * empty-element tag.
7321 *
7322 * With namespace:
7323 *
7324 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7325 *
7326 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7327 *
7328 * Returns the element name parsed
7329 */
7330
7331static const xmlChar *
7332xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007333 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007334 const xmlChar *localname;
7335 const xmlChar *prefix;
7336 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007337 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007338 const xmlChar *nsname;
7339 xmlChar *attvalue;
7340 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007341 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007342 int nratts, nbatts, nbdef;
7343 int i, j, nbNs, attval;
7344 const xmlChar *base;
7345 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007346
7347 if (RAW != '<') return(NULL);
7348 NEXT1;
7349
7350 /*
7351 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7352 * point since the attribute values may be stored as pointers to
7353 * the buffer and calling SHRINK would destroy them !
7354 * The Shrinking is only possible once the full set of attribute
7355 * callbacks have been done.
7356 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007357reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007358 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007359 base = ctxt->input->base;
7360 cur = ctxt->input->cur - ctxt->input->base;
7361 nbatts = 0;
7362 nratts = 0;
7363 nbdef = 0;
7364 nbNs = 0;
7365 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007366
7367 localname = xmlParseQName(ctxt, &prefix);
7368 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007369 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7370 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007371 return(NULL);
7372 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007373 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007374
7375 /*
7376 * Now parse the attributes, it ends up with the ending
7377 *
7378 * (S Attribute)* S?
7379 */
7380 SKIP_BLANKS;
7381 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007382 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007383
7384 while ((RAW != '>') &&
7385 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007386 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387 const xmlChar *q = CUR_PTR;
7388 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007389 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007390
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007391 attname = xmlParseAttribute2(ctxt, prefix, localname,
7392 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007393 if ((attname != NULL) && (attvalue != NULL)) {
7394 if (len < 0) len = xmlStrlen(attvalue);
7395 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007396 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7397 xmlURIPtr uri;
7398
7399 if (*URL != 0) {
7400 uri = xmlParseURI((const char *) URL);
7401 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007402 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7403 "xmlns: %s not a valid URI\n",
7404 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007405 } else {
7406 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007407 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7408 "xmlns: URI %s is not absolute\n",
7409 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007410 }
7411 xmlFreeURI(uri);
7412 }
7413 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007414 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007415 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007416 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007417 for (j = 1;j <= nbNs;j++)
7418 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7419 break;
7420 if (j <= nbNs)
7421 xmlErrAttributeDup(ctxt, NULL, attname);
7422 else
7423 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007424 if (alloc != 0) xmlFree(attvalue);
7425 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007426 continue;
7427 }
7428 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007429 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7430 xmlURIPtr uri;
7431
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007432 if (attname == ctxt->str_xml) {
7433 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007434 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7435 "xml namespace prefix mapped to wrong URI\n",
7436 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007437 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007438 /*
7439 * Do not keep a namespace definition node
7440 */
7441 if (alloc != 0) xmlFree(attvalue);
7442 SKIP_BLANKS;
7443 continue;
7444 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007445 uri = xmlParseURI((const char *) URL);
7446 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007447 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7448 "xmlns:%s: '%s' is not a valid URI\n",
7449 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007450 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007451 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007452 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7453 "xmlns:%s: URI %s is not absolute\n",
7454 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007455 }
7456 xmlFreeURI(uri);
7457 }
7458
Daniel Veillard0fb18932003-09-07 09:14:37 +00007459 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007460 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007461 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007462 for (j = 1;j <= nbNs;j++)
7463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7464 break;
7465 if (j <= nbNs)
7466 xmlErrAttributeDup(ctxt, aprefix, attname);
7467 else
7468 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007469 if (alloc != 0) xmlFree(attvalue);
7470 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007471 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007472 continue;
7473 }
7474
7475 /*
7476 * Add the pair to atts
7477 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007478 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7479 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007480 if (attvalue[len] == 0)
7481 xmlFree(attvalue);
7482 goto failed;
7483 }
7484 maxatts = ctxt->maxatts;
7485 atts = ctxt->atts;
7486 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007487 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007488 atts[nbatts++] = attname;
7489 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007490 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007491 atts[nbatts++] = attvalue;
7492 attvalue += len;
7493 atts[nbatts++] = attvalue;
7494 /*
7495 * tag if some deallocation is needed
7496 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007497 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007498 } else {
7499 if ((attvalue != NULL) && (attvalue[len] == 0))
7500 xmlFree(attvalue);
7501 }
7502
7503failed:
7504
7505 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007506 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007507 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7508 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007509 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7511 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007512 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007513 }
7514 SKIP_BLANKS;
7515 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7516 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007517 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007518 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007519 break;
7520 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007522 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523 }
7524
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007526 * The attributes defaulting
7527 */
7528 if (ctxt->attsDefault != NULL) {
7529 xmlDefAttrsPtr defaults;
7530
7531 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7532 if (defaults != NULL) {
7533 for (i = 0;i < defaults->nbAttrs;i++) {
7534 attname = defaults->values[4 * i];
7535 aprefix = defaults->values[4 * i + 1];
7536
7537 /*
7538 * special work for namespaces defaulted defs
7539 */
7540 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7541 /*
7542 * check that it's not a defined namespace
7543 */
7544 for (j = 1;j <= nbNs;j++)
7545 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7546 break;
7547 if (j <= nbNs) continue;
7548
7549 nsname = xmlGetNamespace(ctxt, NULL);
7550 if (nsname != defaults->values[4 * i + 2]) {
7551 if (nsPush(ctxt, NULL,
7552 defaults->values[4 * i + 2]) > 0)
7553 nbNs++;
7554 }
7555 } else if (aprefix == ctxt->str_xmlns) {
7556 /*
7557 * check that it's not a defined namespace
7558 */
7559 for (j = 1;j <= nbNs;j++)
7560 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7561 break;
7562 if (j <= nbNs) continue;
7563
7564 nsname = xmlGetNamespace(ctxt, attname);
7565 if (nsname != defaults->values[2]) {
7566 if (nsPush(ctxt, attname,
7567 defaults->values[4 * i + 2]) > 0)
7568 nbNs++;
7569 }
7570 } else {
7571 /*
7572 * check that it's not a defined attribute
7573 */
7574 for (j = 0;j < nbatts;j+=5) {
7575 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7576 break;
7577 }
7578 if (j < nbatts) continue;
7579
7580 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7581 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007582 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007583 }
7584 maxatts = ctxt->maxatts;
7585 atts = ctxt->atts;
7586 }
7587 atts[nbatts++] = attname;
7588 atts[nbatts++] = aprefix;
7589 if (aprefix == NULL)
7590 atts[nbatts++] = NULL;
7591 else
7592 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7593 atts[nbatts++] = defaults->values[4 * i + 2];
7594 atts[nbatts++] = defaults->values[4 * i + 3];
7595 nbdef++;
7596 }
7597 }
7598 }
7599 }
7600
Daniel Veillarde70c8772003-11-25 07:21:18 +00007601 /*
7602 * The attributes checkings
7603 */
7604 for (i = 0; i < nbatts;i += 5) {
7605 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7606 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7607 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7608 "Namespace prefix %s for %s on %s is not defined\n",
7609 atts[i + 1], atts[i], localname);
7610 }
7611 atts[i + 2] = nsname;
7612 /*
7613 * [ WFC: Unique Att Spec ]
7614 * No attribute name may appear more than once in the same
7615 * start-tag or empty-element tag.
7616 * As extended by the Namespace in XML REC.
7617 */
7618 for (j = 0; j < i;j += 5) {
7619 if (atts[i] == atts[j]) {
7620 if (atts[i+1] == atts[j+1]) {
7621 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7622 break;
7623 }
7624 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7625 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7626 "Namespaced Attribute %s in '%s' redefined\n",
7627 atts[i], nsname, NULL);
7628 break;
7629 }
7630 }
7631 }
7632 }
7633
Daniel Veillarde57ec792003-09-10 10:50:59 +00007634 nsname = xmlGetNamespace(ctxt, prefix);
7635 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007636 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7637 "Namespace prefix %s on %s is not defined\n",
7638 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007639 }
7640 *pref = prefix;
7641 *URI = nsname;
7642
7643 /*
7644 * SAX: Start of Element !
7645 */
7646 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7647 (!ctxt->disableSAX)) {
7648 if (nbNs > 0)
7649 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7650 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7651 nbatts / 5, nbdef, atts);
7652 else
7653 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7654 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7655 }
7656
7657 /*
7658 * Free up attribute allocated strings if needed
7659 */
7660 if (attval != 0) {
7661 for (i = 3,j = 0; j < nratts;i += 5,j++)
7662 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7663 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007664 }
7665
7666 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007667
7668base_changed:
7669 /*
7670 * the attribute strings are valid iif the base didn't changed
7671 */
7672 if (attval != 0) {
7673 for (i = 3,j = 0; j < nratts;i += 5,j++)
7674 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7675 xmlFree((xmlChar *) atts[i]);
7676 }
7677 ctxt->input->cur = ctxt->input->base + cur;
7678 if (ctxt->wellFormed == 1) {
7679 goto reparse;
7680 }
7681 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007682}
7683
7684/**
7685 * xmlParseEndTag2:
7686 * @ctxt: an XML parser context
7687 * @line: line of the start tag
7688 * @nsNr: number of namespaces on the start tag
7689 *
7690 * parse an end of tag
7691 *
7692 * [42] ETag ::= '</' Name S? '>'
7693 *
7694 * With namespace
7695 *
7696 * [NS 9] ETag ::= '</' QName S? '>'
7697 */
7698
7699static void
7700xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007701 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 const xmlChar *name;
7703
7704 GROW;
7705 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007706 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007707 return;
7708 }
7709 SKIP(2);
7710
William M. Brack13dfa872004-09-18 04:52:08 +00007711 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007712 if (ctxt->input->cur[tlen] == '>') {
7713 ctxt->input->cur += tlen + 1;
7714 goto done;
7715 }
7716 ctxt->input->cur += tlen;
7717 name = (xmlChar*)1;
7718 } else {
7719 if (prefix == NULL)
7720 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7721 else
7722 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7723 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007724
7725 /*
7726 * We should definitely be at the ending "S? '>'" part
7727 */
7728 GROW;
7729 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007730 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007731 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007732 } else
7733 NEXT1;
7734
7735 /*
7736 * [ WFC: Element Type Match ]
7737 * The Name in an element's end-tag must match the element type in the
7738 * start-tag.
7739 *
7740 */
7741 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007742 if (name == NULL) name = BAD_CAST "unparseable";
7743 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007744 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007745 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007746 }
7747
7748 /*
7749 * SAX: End of Tag
7750 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007751done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007752 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7753 (!ctxt->disableSAX))
7754 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7755
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 spacePop(ctxt);
7757 if (nsNr != 0)
7758 nsPop(ctxt, nsNr);
7759 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007760}
7761
7762/**
Owen Taylor3473f882001-02-23 17:55:21 +00007763 * xmlParseCDSect:
7764 * @ctxt: an XML parser context
7765 *
7766 * Parse escaped pure raw content.
7767 *
7768 * [18] CDSect ::= CDStart CData CDEnd
7769 *
7770 * [19] CDStart ::= '<![CDATA['
7771 *
7772 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7773 *
7774 * [21] CDEnd ::= ']]>'
7775 */
7776void
7777xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7778 xmlChar *buf = NULL;
7779 int len = 0;
7780 int size = XML_PARSER_BUFFER_SIZE;
7781 int r, rl;
7782 int s, sl;
7783 int cur, l;
7784 int count = 0;
7785
Daniel Veillard8f597c32003-10-06 08:19:27 +00007786 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007787 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007788 SKIP(9);
7789 } else
7790 return;
7791
7792 ctxt->instate = XML_PARSER_CDATA_SECTION;
7793 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007794 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007795 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007796 ctxt->instate = XML_PARSER_CONTENT;
7797 return;
7798 }
7799 NEXTL(rl);
7800 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007801 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007802 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007803 ctxt->instate = XML_PARSER_CONTENT;
7804 return;
7805 }
7806 NEXTL(sl);
7807 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007808 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007809 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007811 return;
7812 }
William M. Brack871611b2003-10-18 04:53:14 +00007813 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007814 ((r != ']') || (s != ']') || (cur != '>'))) {
7815 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007816 xmlChar *tmp;
7817
Owen Taylor3473f882001-02-23 17:55:21 +00007818 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007819 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7820 if (tmp == NULL) {
7821 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007822 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007823 return;
7824 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007825 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007826 }
7827 COPY_BUF(rl,buf,len,r);
7828 r = s;
7829 rl = sl;
7830 s = cur;
7831 sl = l;
7832 count++;
7833 if (count > 50) {
7834 GROW;
7835 count = 0;
7836 }
7837 NEXTL(l);
7838 cur = CUR_CHAR(l);
7839 }
7840 buf[len] = 0;
7841 ctxt->instate = XML_PARSER_CONTENT;
7842 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007843 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007844 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007845 xmlFree(buf);
7846 return;
7847 }
7848 NEXTL(l);
7849
7850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007851 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007852 */
7853 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7854 if (ctxt->sax->cdataBlock != NULL)
7855 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007856 else if (ctxt->sax->characters != NULL)
7857 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007858 }
7859 xmlFree(buf);
7860}
7861
7862/**
7863 * xmlParseContent:
7864 * @ctxt: an XML parser context
7865 *
7866 * Parse a content:
7867 *
7868 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7869 */
7870
7871void
7872xmlParseContent(xmlParserCtxtPtr ctxt) {
7873 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007874 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007875 ((RAW != '<') || (NXT(1) != '/'))) {
7876 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007877 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007878 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007879
7880 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007881 * First case : a Processing Instruction.
7882 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007883 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007884 xmlParsePI(ctxt);
7885 }
7886
7887 /*
7888 * Second case : a CDSection
7889 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007890 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007891 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007892 xmlParseCDSect(ctxt);
7893 }
7894
7895 /*
7896 * Third case : a comment
7897 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007898 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007899 (NXT(2) == '-') && (NXT(3) == '-')) {
7900 xmlParseComment(ctxt);
7901 ctxt->instate = XML_PARSER_CONTENT;
7902 }
7903
7904 /*
7905 * Fourth case : a sub-element.
7906 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007907 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007908 xmlParseElement(ctxt);
7909 }
7910
7911 /*
7912 * Fifth case : a reference. If if has not been resolved,
7913 * parsing returns it's Name, create the node
7914 */
7915
Daniel Veillard21a0f912001-02-25 19:54:14 +00007916 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007917 xmlParseReference(ctxt);
7918 }
7919
7920 /*
7921 * Last case, text. Note that References are handled directly.
7922 */
7923 else {
7924 xmlParseCharData(ctxt, 0);
7925 }
7926
7927 GROW;
7928 /*
7929 * Pop-up of finished entities.
7930 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007931 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007932 xmlPopInput(ctxt);
7933 SHRINK;
7934
Daniel Veillardfdc91562002-07-01 21:52:03 +00007935 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007936 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7937 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007938 ctxt->instate = XML_PARSER_EOF;
7939 break;
7940 }
7941 }
7942}
7943
7944/**
7945 * xmlParseElement:
7946 * @ctxt: an XML parser context
7947 *
7948 * parse an XML element, this is highly recursive
7949 *
7950 * [39] element ::= EmptyElemTag | STag content ETag
7951 *
7952 * [ WFC: Element Type Match ]
7953 * The Name in an element's end-tag must match the element type in the
7954 * start-tag.
7955 *
Owen Taylor3473f882001-02-23 17:55:21 +00007956 */
7957
7958void
7959xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007960 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007961 const xmlChar *prefix;
7962 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007963 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007964 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007965 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007967
7968 /* Capture start position */
7969 if (ctxt->record_info) {
7970 node_info.begin_pos = ctxt->input->consumed +
7971 (CUR_PTR - ctxt->input->base);
7972 node_info.begin_line = ctxt->input->line;
7973 }
7974
7975 if (ctxt->spaceNr == 0)
7976 spacePush(ctxt, -1);
7977 else
7978 spacePush(ctxt, *ctxt->space);
7979
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007980 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007981#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007982 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007983#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007984 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007985#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007986 else
7987 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007988#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007989 if (name == NULL) {
7990 spacePop(ctxt);
7991 return;
7992 }
7993 namePush(ctxt, name);
7994 ret = ctxt->node;
7995
Daniel Veillard4432df22003-09-28 18:58:27 +00007996#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007997 /*
7998 * [ VC: Root Element Type ]
7999 * The Name in the document type declaration must match the element
8000 * type of the root element.
8001 */
8002 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8003 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8004 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008005#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008006
8007 /*
8008 * Check for an Empty Element.
8009 */
8010 if ((RAW == '/') && (NXT(1) == '>')) {
8011 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008012 if (ctxt->sax2) {
8013 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8014 (!ctxt->disableSAX))
8015 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008016#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008017 } else {
8018 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8019 (!ctxt->disableSAX))
8020 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008021#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008022 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008023 namePop(ctxt);
8024 spacePop(ctxt);
8025 if (nsNr != ctxt->nsNr)
8026 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008027 if ( ret != NULL && ctxt->record_info ) {
8028 node_info.end_pos = ctxt->input->consumed +
8029 (CUR_PTR - ctxt->input->base);
8030 node_info.end_line = ctxt->input->line;
8031 node_info.node = ret;
8032 xmlParserAddNodeInfo(ctxt, &node_info);
8033 }
8034 return;
8035 }
8036 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008037 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008038 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008039 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8040 "Couldn't find end of Start Tag %s line %d\n",
8041 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008042
8043 /*
8044 * end of parsing of this node.
8045 */
8046 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008047 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008048 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008049 if (nsNr != ctxt->nsNr)
8050 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008051
8052 /*
8053 * Capture end position and add node
8054 */
8055 if ( ret != NULL && ctxt->record_info ) {
8056 node_info.end_pos = ctxt->input->consumed +
8057 (CUR_PTR - ctxt->input->base);
8058 node_info.end_line = ctxt->input->line;
8059 node_info.node = ret;
8060 xmlParserAddNodeInfo(ctxt, &node_info);
8061 }
8062 return;
8063 }
8064
8065 /*
8066 * Parse the content of the element:
8067 */
8068 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008069 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008070 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008071 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008072 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008073
8074 /*
8075 * end of parsing of this node.
8076 */
8077 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008078 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008080 if (nsNr != ctxt->nsNr)
8081 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008082 return;
8083 }
8084
8085 /*
8086 * parse the end of tag: '</' should be here.
8087 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008088 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008089 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008090 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008091 }
8092#ifdef LIBXML_SAX1_ENABLED
8093 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008094 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008095#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008096
8097 /*
8098 * Capture end position and add node
8099 */
8100 if ( ret != NULL && ctxt->record_info ) {
8101 node_info.end_pos = ctxt->input->consumed +
8102 (CUR_PTR - ctxt->input->base);
8103 node_info.end_line = ctxt->input->line;
8104 node_info.node = ret;
8105 xmlParserAddNodeInfo(ctxt, &node_info);
8106 }
8107}
8108
8109/**
8110 * xmlParseVersionNum:
8111 * @ctxt: an XML parser context
8112 *
8113 * parse the XML version value.
8114 *
8115 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8116 *
8117 * Returns the string giving the XML version number, or NULL
8118 */
8119xmlChar *
8120xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8121 xmlChar *buf = NULL;
8122 int len = 0;
8123 int size = 10;
8124 xmlChar cur;
8125
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008126 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008127 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008129 return(NULL);
8130 }
8131 cur = CUR;
8132 while (((cur >= 'a') && (cur <= 'z')) ||
8133 ((cur >= 'A') && (cur <= 'Z')) ||
8134 ((cur >= '0') && (cur <= '9')) ||
8135 (cur == '_') || (cur == '.') ||
8136 (cur == ':') || (cur == '-')) {
8137 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008138 xmlChar *tmp;
8139
Owen Taylor3473f882001-02-23 17:55:21 +00008140 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008141 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8142 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008143 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008144 return(NULL);
8145 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008146 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008147 }
8148 buf[len++] = cur;
8149 NEXT;
8150 cur=CUR;
8151 }
8152 buf[len] = 0;
8153 return(buf);
8154}
8155
8156/**
8157 * xmlParseVersionInfo:
8158 * @ctxt: an XML parser context
8159 *
8160 * parse the XML version.
8161 *
8162 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8163 *
8164 * [25] Eq ::= S? '=' S?
8165 *
8166 * Returns the version string, e.g. "1.0"
8167 */
8168
8169xmlChar *
8170xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8171 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008172
Daniel Veillarda07050d2003-10-19 14:46:32 +00008173 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008174 SKIP(7);
8175 SKIP_BLANKS;
8176 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008177 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008178 return(NULL);
8179 }
8180 NEXT;
8181 SKIP_BLANKS;
8182 if (RAW == '"') {
8183 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008184 version = xmlParseVersionNum(ctxt);
8185 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008186 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008187 } else
8188 NEXT;
8189 } else if (RAW == '\''){
8190 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008191 version = xmlParseVersionNum(ctxt);
8192 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008193 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008194 } else
8195 NEXT;
8196 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008197 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008198 }
8199 }
8200 return(version);
8201}
8202
8203/**
8204 * xmlParseEncName:
8205 * @ctxt: an XML parser context
8206 *
8207 * parse the XML encoding name
8208 *
8209 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8210 *
8211 * Returns the encoding name value or NULL
8212 */
8213xmlChar *
8214xmlParseEncName(xmlParserCtxtPtr ctxt) {
8215 xmlChar *buf = NULL;
8216 int len = 0;
8217 int size = 10;
8218 xmlChar cur;
8219
8220 cur = CUR;
8221 if (((cur >= 'a') && (cur <= 'z')) ||
8222 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008224 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008225 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008226 return(NULL);
8227 }
8228
8229 buf[len++] = cur;
8230 NEXT;
8231 cur = CUR;
8232 while (((cur >= 'a') && (cur <= 'z')) ||
8233 ((cur >= 'A') && (cur <= 'Z')) ||
8234 ((cur >= '0') && (cur <= '9')) ||
8235 (cur == '.') || (cur == '_') ||
8236 (cur == '-')) {
8237 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008238 xmlChar *tmp;
8239
Owen Taylor3473f882001-02-23 17:55:21 +00008240 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008241 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8242 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008243 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008244 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008245 return(NULL);
8246 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008247 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008248 }
8249 buf[len++] = cur;
8250 NEXT;
8251 cur = CUR;
8252 if (cur == 0) {
8253 SHRINK;
8254 GROW;
8255 cur = CUR;
8256 }
8257 }
8258 buf[len] = 0;
8259 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008260 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008261 }
8262 return(buf);
8263}
8264
8265/**
8266 * xmlParseEncodingDecl:
8267 * @ctxt: an XML parser context
8268 *
8269 * parse the XML encoding declaration
8270 *
8271 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8272 *
8273 * this setups the conversion filters.
8274 *
8275 * Returns the encoding value or NULL
8276 */
8277
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008278const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008279xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8280 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008281
8282 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008283 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008284 SKIP(8);
8285 SKIP_BLANKS;
8286 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008287 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008288 return(NULL);
8289 }
8290 NEXT;
8291 SKIP_BLANKS;
8292 if (RAW == '"') {
8293 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008294 encoding = xmlParseEncName(ctxt);
8295 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008296 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008297 } else
8298 NEXT;
8299 } else if (RAW == '\''){
8300 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008301 encoding = xmlParseEncName(ctxt);
8302 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008303 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008304 } else
8305 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008307 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008309 /*
8310 * UTF-16 encoding stwich has already taken place at this stage,
8311 * more over the little-endian/big-endian selection is already done
8312 */
8313 if ((encoding != NULL) &&
8314 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8315 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008316 if (ctxt->encoding != NULL)
8317 xmlFree((xmlChar *) ctxt->encoding);
8318 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008319 }
8320 /*
8321 * UTF-8 encoding is handled natively
8322 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008323 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008324 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8325 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008326 if (ctxt->encoding != NULL)
8327 xmlFree((xmlChar *) ctxt->encoding);
8328 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008329 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008330 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008331 xmlCharEncodingHandlerPtr handler;
8332
8333 if (ctxt->input->encoding != NULL)
8334 xmlFree((xmlChar *) ctxt->input->encoding);
8335 ctxt->input->encoding = encoding;
8336
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008337 handler = xmlFindCharEncodingHandler((const char *) encoding);
8338 if (handler != NULL) {
8339 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008341 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008342 "Unsupported encoding %s\n", encoding);
8343 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008344 }
8345 }
8346 }
8347 return(encoding);
8348}
8349
8350/**
8351 * xmlParseSDDecl:
8352 * @ctxt: an XML parser context
8353 *
8354 * parse the XML standalone declaration
8355 *
8356 * [32] SDDecl ::= S 'standalone' Eq
8357 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8358 *
8359 * [ VC: Standalone Document Declaration ]
8360 * TODO The standalone document declaration must have the value "no"
8361 * if any external markup declarations contain declarations of:
8362 * - attributes with default values, if elements to which these
8363 * attributes apply appear in the document without specifications
8364 * of values for these attributes, or
8365 * - entities (other than amp, lt, gt, apos, quot), if references
8366 * to those entities appear in the document, or
8367 * - attributes with values subject to normalization, where the
8368 * attribute appears in the document with a value which will change
8369 * as a result of normalization, or
8370 * - element types with element content, if white space occurs directly
8371 * within any instance of those types.
8372 *
8373 * Returns 1 if standalone, 0 otherwise
8374 */
8375
8376int
8377xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8378 int standalone = -1;
8379
8380 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008381 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008382 SKIP(10);
8383 SKIP_BLANKS;
8384 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008385 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008386 return(standalone);
8387 }
8388 NEXT;
8389 SKIP_BLANKS;
8390 if (RAW == '\''){
8391 NEXT;
8392 if ((RAW == 'n') && (NXT(1) == 'o')) {
8393 standalone = 0;
8394 SKIP(2);
8395 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8396 (NXT(2) == 's')) {
8397 standalone = 1;
8398 SKIP(3);
8399 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008400 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 }
8402 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008403 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008404 } else
8405 NEXT;
8406 } else if (RAW == '"'){
8407 NEXT;
8408 if ((RAW == 'n') && (NXT(1) == 'o')) {
8409 standalone = 0;
8410 SKIP(2);
8411 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8412 (NXT(2) == 's')) {
8413 standalone = 1;
8414 SKIP(3);
8415 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008416 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 }
8418 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008419 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008420 } else
8421 NEXT;
8422 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008423 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008424 }
8425 }
8426 return(standalone);
8427}
8428
8429/**
8430 * xmlParseXMLDecl:
8431 * @ctxt: an XML parser context
8432 *
8433 * parse an XML declaration header
8434 *
8435 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8436 */
8437
8438void
8439xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8440 xmlChar *version;
8441
8442 /*
8443 * We know that '<?xml' is here.
8444 */
8445 SKIP(5);
8446
William M. Brack76e95df2003-10-18 16:20:14 +00008447 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8449 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008450 }
8451 SKIP_BLANKS;
8452
8453 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008454 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008455 */
8456 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008457 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008458 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008459 } else {
8460 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8461 /*
8462 * TODO: Blueberry should be detected here
8463 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008464 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8465 "Unsupported version '%s'\n",
8466 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008467 }
8468 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008469 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008470 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008471 }
Owen Taylor3473f882001-02-23 17:55:21 +00008472
8473 /*
8474 * We may have the encoding declaration
8475 */
William M. Brack76e95df2003-10-18 16:20:14 +00008476 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008477 if ((RAW == '?') && (NXT(1) == '>')) {
8478 SKIP(2);
8479 return;
8480 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008481 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008482 }
8483 xmlParseEncodingDecl(ctxt);
8484 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8485 /*
8486 * The XML REC instructs us to stop parsing right here
8487 */
8488 return;
8489 }
8490
8491 /*
8492 * We may have the standalone status.
8493 */
William M. Brack76e95df2003-10-18 16:20:14 +00008494 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008495 if ((RAW == '?') && (NXT(1) == '>')) {
8496 SKIP(2);
8497 return;
8498 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008499 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008500 }
8501 SKIP_BLANKS;
8502 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8503
8504 SKIP_BLANKS;
8505 if ((RAW == '?') && (NXT(1) == '>')) {
8506 SKIP(2);
8507 } else if (RAW == '>') {
8508 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008509 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008510 NEXT;
8511 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008512 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008513 MOVETO_ENDTAG(CUR_PTR);
8514 NEXT;
8515 }
8516}
8517
8518/**
8519 * xmlParseMisc:
8520 * @ctxt: an XML parser context
8521 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008522 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008523 *
8524 * [27] Misc ::= Comment | PI | S
8525 */
8526
8527void
8528xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008529 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008530 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008531 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008532 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008533 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008534 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008535 NEXT;
8536 } else
8537 xmlParseComment(ctxt);
8538 }
8539}
8540
8541/**
8542 * xmlParseDocument:
8543 * @ctxt: an XML parser context
8544 *
8545 * parse an XML document (and build a tree if using the standard SAX
8546 * interface).
8547 *
8548 * [1] document ::= prolog element Misc*
8549 *
8550 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8551 *
8552 * Returns 0, -1 in case of error. the parser context is augmented
8553 * as a result of the parsing.
8554 */
8555
8556int
8557xmlParseDocument(xmlParserCtxtPtr ctxt) {
8558 xmlChar start[4];
8559 xmlCharEncoding enc;
8560
8561 xmlInitParser();
8562
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008563 if ((ctxt == NULL) || (ctxt->input == NULL))
8564 return(-1);
8565
Owen Taylor3473f882001-02-23 17:55:21 +00008566 GROW;
8567
8568 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 * SAX: detecting the level.
8570 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008571 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008572
8573 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008574 * SAX: beginning of the document processing.
8575 */
8576 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8577 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8578
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008579 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8580 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008581 /*
8582 * Get the 4 first bytes and decode the charset
8583 * if enc != XML_CHAR_ENCODING_NONE
8584 * plug some encoding conversion routines.
8585 */
8586 start[0] = RAW;
8587 start[1] = NXT(1);
8588 start[2] = NXT(2);
8589 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008590 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008591 if (enc != XML_CHAR_ENCODING_NONE) {
8592 xmlSwitchEncoding(ctxt, enc);
8593 }
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
8595
8596
8597 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 }
8600
8601 /*
8602 * Check for the XMLDecl in the Prolog.
8603 */
8604 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008605 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008606
8607 /*
8608 * Note that we will switch encoding on the fly.
8609 */
8610 xmlParseXMLDecl(ctxt);
8611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8612 /*
8613 * The XML REC instructs us to stop parsing right here
8614 */
8615 return(-1);
8616 }
8617 ctxt->standalone = ctxt->input->standalone;
8618 SKIP_BLANKS;
8619 } else {
8620 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8621 }
8622 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8623 ctxt->sax->startDocument(ctxt->userData);
8624
8625 /*
8626 * The Misc part of the Prolog
8627 */
8628 GROW;
8629 xmlParseMisc(ctxt);
8630
8631 /*
8632 * Then possibly doc type declaration(s) and more Misc
8633 * (doctypedecl Misc*)?
8634 */
8635 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008636 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008637
8638 ctxt->inSubset = 1;
8639 xmlParseDocTypeDecl(ctxt);
8640 if (RAW == '[') {
8641 ctxt->instate = XML_PARSER_DTD;
8642 xmlParseInternalSubset(ctxt);
8643 }
8644
8645 /*
8646 * Create and update the external subset.
8647 */
8648 ctxt->inSubset = 2;
8649 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8650 (!ctxt->disableSAX))
8651 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8652 ctxt->extSubSystem, ctxt->extSubURI);
8653 ctxt->inSubset = 0;
8654
8655
8656 ctxt->instate = XML_PARSER_PROLOG;
8657 xmlParseMisc(ctxt);
8658 }
8659
8660 /*
8661 * Time to start parsing the tree itself
8662 */
8663 GROW;
8664 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008665 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8666 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008667 } else {
8668 ctxt->instate = XML_PARSER_CONTENT;
8669 xmlParseElement(ctxt);
8670 ctxt->instate = XML_PARSER_EPILOG;
8671
8672
8673 /*
8674 * The Misc part at the end
8675 */
8676 xmlParseMisc(ctxt);
8677
Daniel Veillard561b7f82002-03-20 21:55:57 +00008678 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008679 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008680 }
8681 ctxt->instate = XML_PARSER_EOF;
8682 }
8683
8684 /*
8685 * SAX: end of the document processing.
8686 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008687 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008688 ctxt->sax->endDocument(ctxt->userData);
8689
Daniel Veillard5997aca2002-03-18 18:36:20 +00008690 /*
8691 * Remove locally kept entity definitions if the tree was not built
8692 */
8693 if ((ctxt->myDoc != NULL) &&
8694 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8695 xmlFreeDoc(ctxt->myDoc);
8696 ctxt->myDoc = NULL;
8697 }
8698
Daniel Veillardc7612992002-02-17 22:47:37 +00008699 if (! ctxt->wellFormed) {
8700 ctxt->valid = 0;
8701 return(-1);
8702 }
Owen Taylor3473f882001-02-23 17:55:21 +00008703 return(0);
8704}
8705
8706/**
8707 * xmlParseExtParsedEnt:
8708 * @ctxt: an XML parser context
8709 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008710 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008711 * An external general parsed entity is well-formed if it matches the
8712 * production labeled extParsedEnt.
8713 *
8714 * [78] extParsedEnt ::= TextDecl? content
8715 *
8716 * Returns 0, -1 in case of error. the parser context is augmented
8717 * as a result of the parsing.
8718 */
8719
8720int
8721xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8722 xmlChar start[4];
8723 xmlCharEncoding enc;
8724
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008725 if ((ctxt == NULL) || (ctxt->input == NULL))
8726 return(-1);
8727
Owen Taylor3473f882001-02-23 17:55:21 +00008728 xmlDefaultSAXHandlerInit();
8729
Daniel Veillard309f81d2003-09-23 09:02:53 +00008730 xmlDetectSAX2(ctxt);
8731
Owen Taylor3473f882001-02-23 17:55:21 +00008732 GROW;
8733
8734 /*
8735 * SAX: beginning of the document processing.
8736 */
8737 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8738 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8739
8740 /*
8741 * Get the 4 first bytes and decode the charset
8742 * if enc != XML_CHAR_ENCODING_NONE
8743 * plug some encoding conversion routines.
8744 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008745 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8746 start[0] = RAW;
8747 start[1] = NXT(1);
8748 start[2] = NXT(2);
8749 start[3] = NXT(3);
8750 enc = xmlDetectCharEncoding(start, 4);
8751 if (enc != XML_CHAR_ENCODING_NONE) {
8752 xmlSwitchEncoding(ctxt, enc);
8753 }
Owen Taylor3473f882001-02-23 17:55:21 +00008754 }
8755
8756
8757 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008758 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008759 }
8760
8761 /*
8762 * Check for the XMLDecl in the Prolog.
8763 */
8764 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008765 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008766
8767 /*
8768 * Note that we will switch encoding on the fly.
8769 */
8770 xmlParseXMLDecl(ctxt);
8771 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8772 /*
8773 * The XML REC instructs us to stop parsing right here
8774 */
8775 return(-1);
8776 }
8777 SKIP_BLANKS;
8778 } else {
8779 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8780 }
8781 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8782 ctxt->sax->startDocument(ctxt->userData);
8783
8784 /*
8785 * Doing validity checking on chunk doesn't make sense
8786 */
8787 ctxt->instate = XML_PARSER_CONTENT;
8788 ctxt->validate = 0;
8789 ctxt->loadsubset = 0;
8790 ctxt->depth = 0;
8791
8792 xmlParseContent(ctxt);
8793
8794 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008795 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008796 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008797 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008798 }
8799
8800 /*
8801 * SAX: end of the document processing.
8802 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008803 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008804 ctxt->sax->endDocument(ctxt->userData);
8805
8806 if (! ctxt->wellFormed) return(-1);
8807 return(0);
8808}
8809
Daniel Veillard73b013f2003-09-30 12:36:01 +00008810#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008811/************************************************************************
8812 * *
8813 * Progressive parsing interfaces *
8814 * *
8815 ************************************************************************/
8816
8817/**
8818 * xmlParseLookupSequence:
8819 * @ctxt: an XML parser context
8820 * @first: the first char to lookup
8821 * @next: the next char to lookup or zero
8822 * @third: the next char to lookup or zero
8823 *
8824 * Try to find if a sequence (first, next, third) or just (first next) or
8825 * (first) is available in the input stream.
8826 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8827 * to avoid rescanning sequences of bytes, it DOES change the state of the
8828 * parser, do not use liberally.
8829 *
8830 * Returns the index to the current parsing point if the full sequence
8831 * is available, -1 otherwise.
8832 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008833static int
Owen Taylor3473f882001-02-23 17:55:21 +00008834xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8835 xmlChar next, xmlChar third) {
8836 int base, len;
8837 xmlParserInputPtr in;
8838 const xmlChar *buf;
8839
8840 in = ctxt->input;
8841 if (in == NULL) return(-1);
8842 base = in->cur - in->base;
8843 if (base < 0) return(-1);
8844 if (ctxt->checkIndex > base)
8845 base = ctxt->checkIndex;
8846 if (in->buf == NULL) {
8847 buf = in->base;
8848 len = in->length;
8849 } else {
8850 buf = in->buf->buffer->content;
8851 len = in->buf->buffer->use;
8852 }
8853 /* take into account the sequence length */
8854 if (third) len -= 2;
8855 else if (next) len --;
8856 for (;base < len;base++) {
8857 if (buf[base] == first) {
8858 if (third != 0) {
8859 if ((buf[base + 1] != next) ||
8860 (buf[base + 2] != third)) continue;
8861 } else if (next != 0) {
8862 if (buf[base + 1] != next) continue;
8863 }
8864 ctxt->checkIndex = 0;
8865#ifdef DEBUG_PUSH
8866 if (next == 0)
8867 xmlGenericError(xmlGenericErrorContext,
8868 "PP: lookup '%c' found at %d\n",
8869 first, base);
8870 else if (third == 0)
8871 xmlGenericError(xmlGenericErrorContext,
8872 "PP: lookup '%c%c' found at %d\n",
8873 first, next, base);
8874 else
8875 xmlGenericError(xmlGenericErrorContext,
8876 "PP: lookup '%c%c%c' found at %d\n",
8877 first, next, third, base);
8878#endif
8879 return(base - (in->cur - in->base));
8880 }
8881 }
8882 ctxt->checkIndex = base;
8883#ifdef DEBUG_PUSH
8884 if (next == 0)
8885 xmlGenericError(xmlGenericErrorContext,
8886 "PP: lookup '%c' failed\n", first);
8887 else if (third == 0)
8888 xmlGenericError(xmlGenericErrorContext,
8889 "PP: lookup '%c%c' failed\n", first, next);
8890 else
8891 xmlGenericError(xmlGenericErrorContext,
8892 "PP: lookup '%c%c%c' failed\n", first, next, third);
8893#endif
8894 return(-1);
8895}
8896
8897/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008898 * xmlParseGetLasts:
8899 * @ctxt: an XML parser context
8900 * @lastlt: pointer to store the last '<' from the input
8901 * @lastgt: pointer to store the last '>' from the input
8902 *
8903 * Lookup the last < and > in the current chunk
8904 */
8905static void
8906xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8907 const xmlChar **lastgt) {
8908 const xmlChar *tmp;
8909
8910 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8911 xmlGenericError(xmlGenericErrorContext,
8912 "Internal error: xmlParseGetLasts\n");
8913 return;
8914 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008915 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008916 tmp = ctxt->input->end;
8917 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008918 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008919 if (tmp < ctxt->input->base) {
8920 *lastlt = NULL;
8921 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008922 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008923 *lastlt = tmp;
8924 tmp++;
8925 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8926 if (*tmp == '\'') {
8927 tmp++;
8928 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8929 if (tmp < ctxt->input->end) tmp++;
8930 } else if (*tmp == '"') {
8931 tmp++;
8932 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8933 if (tmp < ctxt->input->end) tmp++;
8934 } else
8935 tmp++;
8936 }
8937 if (tmp < ctxt->input->end)
8938 *lastgt = tmp;
8939 else {
8940 tmp = *lastlt;
8941 tmp--;
8942 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8943 if (tmp >= ctxt->input->base)
8944 *lastgt = tmp;
8945 else
8946 *lastgt = NULL;
8947 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008948 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008949 } else {
8950 *lastlt = NULL;
8951 *lastgt = NULL;
8952 }
8953}
8954/**
Owen Taylor3473f882001-02-23 17:55:21 +00008955 * xmlParseTryOrFinish:
8956 * @ctxt: an XML parser context
8957 * @terminate: last chunk indicator
8958 *
8959 * Try to progress on parsing
8960 *
8961 * Returns zero if no parsing was possible
8962 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008963static int
Owen Taylor3473f882001-02-23 17:55:21 +00008964xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8965 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008966 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008967 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008968 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008969
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008970 if (ctxt->input == NULL)
8971 return(0);
8972
Owen Taylor3473f882001-02-23 17:55:21 +00008973#ifdef DEBUG_PUSH
8974 switch (ctxt->instate) {
8975 case XML_PARSER_EOF:
8976 xmlGenericError(xmlGenericErrorContext,
8977 "PP: try EOF\n"); break;
8978 case XML_PARSER_START:
8979 xmlGenericError(xmlGenericErrorContext,
8980 "PP: try START\n"); break;
8981 case XML_PARSER_MISC:
8982 xmlGenericError(xmlGenericErrorContext,
8983 "PP: try MISC\n");break;
8984 case XML_PARSER_COMMENT:
8985 xmlGenericError(xmlGenericErrorContext,
8986 "PP: try COMMENT\n");break;
8987 case XML_PARSER_PROLOG:
8988 xmlGenericError(xmlGenericErrorContext,
8989 "PP: try PROLOG\n");break;
8990 case XML_PARSER_START_TAG:
8991 xmlGenericError(xmlGenericErrorContext,
8992 "PP: try START_TAG\n");break;
8993 case XML_PARSER_CONTENT:
8994 xmlGenericError(xmlGenericErrorContext,
8995 "PP: try CONTENT\n");break;
8996 case XML_PARSER_CDATA_SECTION:
8997 xmlGenericError(xmlGenericErrorContext,
8998 "PP: try CDATA_SECTION\n");break;
8999 case XML_PARSER_END_TAG:
9000 xmlGenericError(xmlGenericErrorContext,
9001 "PP: try END_TAG\n");break;
9002 case XML_PARSER_ENTITY_DECL:
9003 xmlGenericError(xmlGenericErrorContext,
9004 "PP: try ENTITY_DECL\n");break;
9005 case XML_PARSER_ENTITY_VALUE:
9006 xmlGenericError(xmlGenericErrorContext,
9007 "PP: try ENTITY_VALUE\n");break;
9008 case XML_PARSER_ATTRIBUTE_VALUE:
9009 xmlGenericError(xmlGenericErrorContext,
9010 "PP: try ATTRIBUTE_VALUE\n");break;
9011 case XML_PARSER_DTD:
9012 xmlGenericError(xmlGenericErrorContext,
9013 "PP: try DTD\n");break;
9014 case XML_PARSER_EPILOG:
9015 xmlGenericError(xmlGenericErrorContext,
9016 "PP: try EPILOG\n");break;
9017 case XML_PARSER_PI:
9018 xmlGenericError(xmlGenericErrorContext,
9019 "PP: try PI\n");break;
9020 case XML_PARSER_IGNORE:
9021 xmlGenericError(xmlGenericErrorContext,
9022 "PP: try IGNORE\n");break;
9023 }
9024#endif
9025
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009026 if ((ctxt->input != NULL) &&
9027 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009028 xmlSHRINK(ctxt);
9029 ctxt->checkIndex = 0;
9030 }
9031 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009032
Daniel Veillarda880b122003-04-21 21:36:41 +00009033 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009034 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9035 return(0);
9036
9037
Owen Taylor3473f882001-02-23 17:55:21 +00009038 /*
9039 * Pop-up of finished entities.
9040 */
9041 while ((RAW == 0) && (ctxt->inputNr > 1))
9042 xmlPopInput(ctxt);
9043
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009044 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009045 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009046 avail = ctxt->input->length -
9047 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009048 else {
9049 /*
9050 * If we are operating on converted input, try to flush
9051 * remainng chars to avoid them stalling in the non-converted
9052 * buffer.
9053 */
9054 if ((ctxt->input->buf->raw != NULL) &&
9055 (ctxt->input->buf->raw->use > 0)) {
9056 int base = ctxt->input->base -
9057 ctxt->input->buf->buffer->content;
9058 int current = ctxt->input->cur - ctxt->input->base;
9059
9060 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9061 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9062 ctxt->input->cur = ctxt->input->base + current;
9063 ctxt->input->end =
9064 &ctxt->input->buf->buffer->content[
9065 ctxt->input->buf->buffer->use];
9066 }
9067 avail = ctxt->input->buf->buffer->use -
9068 (ctxt->input->cur - ctxt->input->base);
9069 }
Owen Taylor3473f882001-02-23 17:55:21 +00009070 if (avail < 1)
9071 goto done;
9072 switch (ctxt->instate) {
9073 case XML_PARSER_EOF:
9074 /*
9075 * Document parsing is done !
9076 */
9077 goto done;
9078 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009079 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9080 xmlChar start[4];
9081 xmlCharEncoding enc;
9082
9083 /*
9084 * Very first chars read from the document flow.
9085 */
9086 if (avail < 4)
9087 goto done;
9088
9089 /*
9090 * Get the 4 first bytes and decode the charset
9091 * if enc != XML_CHAR_ENCODING_NONE
9092 * plug some encoding conversion routines.
9093 */
9094 start[0] = RAW;
9095 start[1] = NXT(1);
9096 start[2] = NXT(2);
9097 start[3] = NXT(3);
9098 enc = xmlDetectCharEncoding(start, 4);
9099 if (enc != XML_CHAR_ENCODING_NONE) {
9100 xmlSwitchEncoding(ctxt, enc);
9101 }
9102 break;
9103 }
Owen Taylor3473f882001-02-23 17:55:21 +00009104
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009105 if (avail < 2)
9106 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009107 cur = ctxt->input->cur[0];
9108 next = ctxt->input->cur[1];
9109 if (cur == 0) {
9110 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9111 ctxt->sax->setDocumentLocator(ctxt->userData,
9112 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009113 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009114 ctxt->instate = XML_PARSER_EOF;
9115#ifdef DEBUG_PUSH
9116 xmlGenericError(xmlGenericErrorContext,
9117 "PP: entering EOF\n");
9118#endif
9119 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9120 ctxt->sax->endDocument(ctxt->userData);
9121 goto done;
9122 }
9123 if ((cur == '<') && (next == '?')) {
9124 /* PI or XML decl */
9125 if (avail < 5) return(ret);
9126 if ((!terminate) &&
9127 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9128 return(ret);
9129 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9130 ctxt->sax->setDocumentLocator(ctxt->userData,
9131 &xmlDefaultSAXLocator);
9132 if ((ctxt->input->cur[2] == 'x') &&
9133 (ctxt->input->cur[3] == 'm') &&
9134 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009135 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009136 ret += 5;
9137#ifdef DEBUG_PUSH
9138 xmlGenericError(xmlGenericErrorContext,
9139 "PP: Parsing XML Decl\n");
9140#endif
9141 xmlParseXMLDecl(ctxt);
9142 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9143 /*
9144 * The XML REC instructs us to stop parsing right
9145 * here
9146 */
9147 ctxt->instate = XML_PARSER_EOF;
9148 return(0);
9149 }
9150 ctxt->standalone = ctxt->input->standalone;
9151 if ((ctxt->encoding == NULL) &&
9152 (ctxt->input->encoding != NULL))
9153 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9154 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9155 (!ctxt->disableSAX))
9156 ctxt->sax->startDocument(ctxt->userData);
9157 ctxt->instate = XML_PARSER_MISC;
9158#ifdef DEBUG_PUSH
9159 xmlGenericError(xmlGenericErrorContext,
9160 "PP: entering MISC\n");
9161#endif
9162 } else {
9163 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9164 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9165 (!ctxt->disableSAX))
9166 ctxt->sax->startDocument(ctxt->userData);
9167 ctxt->instate = XML_PARSER_MISC;
9168#ifdef DEBUG_PUSH
9169 xmlGenericError(xmlGenericErrorContext,
9170 "PP: entering MISC\n");
9171#endif
9172 }
9173 } else {
9174 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9175 ctxt->sax->setDocumentLocator(ctxt->userData,
9176 &xmlDefaultSAXLocator);
9177 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009178 if (ctxt->version == NULL) {
9179 xmlErrMemory(ctxt, NULL);
9180 break;
9181 }
Owen Taylor3473f882001-02-23 17:55:21 +00009182 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9183 (!ctxt->disableSAX))
9184 ctxt->sax->startDocument(ctxt->userData);
9185 ctxt->instate = XML_PARSER_MISC;
9186#ifdef DEBUG_PUSH
9187 xmlGenericError(xmlGenericErrorContext,
9188 "PP: entering MISC\n");
9189#endif
9190 }
9191 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009192 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009193 const xmlChar *name;
9194 const xmlChar *prefix;
9195 const xmlChar *URI;
9196 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009197
9198 if ((avail < 2) && (ctxt->inputNr == 1))
9199 goto done;
9200 cur = ctxt->input->cur[0];
9201 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009202 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009203 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009204 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9205 ctxt->sax->endDocument(ctxt->userData);
9206 goto done;
9207 }
9208 if (!terminate) {
9209 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009210 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009211 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009212 goto done;
9213 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9214 goto done;
9215 }
9216 }
9217 if (ctxt->spaceNr == 0)
9218 spacePush(ctxt, -1);
9219 else
9220 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009221#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009222 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009223#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009224 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009225#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009226 else
9227 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009228#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 if (name == NULL) {
9230 spacePop(ctxt);
9231 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009232 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9233 ctxt->sax->endDocument(ctxt->userData);
9234 goto done;
9235 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009236#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009237 /*
9238 * [ VC: Root Element Type ]
9239 * The Name in the document type declaration must match
9240 * the element type of the root element.
9241 */
9242 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9243 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9244 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009245#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009246
9247 /*
9248 * Check for an Empty Element.
9249 */
9250 if ((RAW == '/') && (NXT(1) == '>')) {
9251 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252
9253 if (ctxt->sax2) {
9254 if ((ctxt->sax != NULL) &&
9255 (ctxt->sax->endElementNs != NULL) &&
9256 (!ctxt->disableSAX))
9257 ctxt->sax->endElementNs(ctxt->userData, name,
9258 prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009259#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009260 } else {
9261 if ((ctxt->sax != NULL) &&
9262 (ctxt->sax->endElement != NULL) &&
9263 (!ctxt->disableSAX))
9264 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009265#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009266 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009267 spacePop(ctxt);
9268 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009269 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009270 } else {
9271 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009272 }
9273 break;
9274 }
9275 if (RAW == '>') {
9276 NEXT;
9277 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009278 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009279 "Couldn't find end of Start Tag %s\n",
9280 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009281 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009282 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009283 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009284 if (ctxt->sax2)
9285 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009286#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009287 else
9288 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009289#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009290
Daniel Veillarda880b122003-04-21 21:36:41 +00009291 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009292 break;
9293 }
9294 case XML_PARSER_CONTENT: {
9295 const xmlChar *test;
9296 unsigned int cons;
9297 if ((avail < 2) && (ctxt->inputNr == 1))
9298 goto done;
9299 cur = ctxt->input->cur[0];
9300 next = ctxt->input->cur[1];
9301
9302 test = CUR_PTR;
9303 cons = ctxt->input->consumed;
9304 if ((cur == '<') && (next == '/')) {
9305 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009306 break;
9307 } else if ((cur == '<') && (next == '?')) {
9308 if ((!terminate) &&
9309 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9310 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009311 xmlParsePI(ctxt);
9312 } else if ((cur == '<') && (next != '!')) {
9313 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009314 break;
9315 } else if ((cur == '<') && (next == '!') &&
9316 (ctxt->input->cur[2] == '-') &&
9317 (ctxt->input->cur[3] == '-')) {
9318 if ((!terminate) &&
9319 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9320 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009321 xmlParseComment(ctxt);
9322 ctxt->instate = XML_PARSER_CONTENT;
9323 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9324 (ctxt->input->cur[2] == '[') &&
9325 (ctxt->input->cur[3] == 'C') &&
9326 (ctxt->input->cur[4] == 'D') &&
9327 (ctxt->input->cur[5] == 'A') &&
9328 (ctxt->input->cur[6] == 'T') &&
9329 (ctxt->input->cur[7] == 'A') &&
9330 (ctxt->input->cur[8] == '[')) {
9331 SKIP(9);
9332 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009333 break;
9334 } else if ((cur == '<') && (next == '!') &&
9335 (avail < 9)) {
9336 goto done;
9337 } else if (cur == '&') {
9338 if ((!terminate) &&
9339 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9340 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009341 xmlParseReference(ctxt);
9342 } else {
9343 /* TODO Avoid the extra copy, handle directly !!! */
9344 /*
9345 * Goal of the following test is:
9346 * - minimize calls to the SAX 'character' callback
9347 * when they are mergeable
9348 * - handle an problem for isBlank when we only parse
9349 * a sequence of blank chars and the next one is
9350 * not available to check against '<' presence.
9351 * - tries to homogenize the differences in SAX
9352 * callbacks between the push and pull versions
9353 * of the parser.
9354 */
9355 if ((ctxt->inputNr == 1) &&
9356 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9357 if (!terminate) {
9358 if (ctxt->progressive) {
9359 if ((lastlt == NULL) ||
9360 (ctxt->input->cur > lastlt))
9361 goto done;
9362 } else if (xmlParseLookupSequence(ctxt,
9363 '<', 0, 0) < 0) {
9364 goto done;
9365 }
9366 }
9367 }
9368 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009369 xmlParseCharData(ctxt, 0);
9370 }
9371 /*
9372 * Pop-up of finished entities.
9373 */
9374 while ((RAW == 0) && (ctxt->inputNr > 1))
9375 xmlPopInput(ctxt);
9376 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9378 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009379 ctxt->instate = XML_PARSER_EOF;
9380 break;
9381 }
9382 break;
9383 }
9384 case XML_PARSER_END_TAG:
9385 if (avail < 2)
9386 goto done;
9387 if (!terminate) {
9388 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009389 /* > can be found unescaped in attribute values */
9390 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009391 goto done;
9392 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9393 goto done;
9394 }
9395 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009396 if (ctxt->sax2) {
9397 xmlParseEndTag2(ctxt,
9398 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9399 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009400 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009401 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009402 }
9403#ifdef LIBXML_SAX1_ENABLED
9404 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009405 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009406#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009407 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009408 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009409 } else {
9410 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009411 }
9412 break;
9413 case XML_PARSER_CDATA_SECTION: {
9414 /*
9415 * The Push mode need to have the SAX callback for
9416 * cdataBlock merge back contiguous callbacks.
9417 */
9418 int base;
9419
9420 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9421 if (base < 0) {
9422 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9423 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9424 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009425 ctxt->sax->cdataBlock(ctxt->userData,
9426 ctxt->input->cur,
9427 XML_PARSER_BIG_BUFFER_SIZE);
9428 else if (ctxt->sax->characters != NULL)
9429 ctxt->sax->characters(ctxt->userData,
9430 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009431 XML_PARSER_BIG_BUFFER_SIZE);
9432 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009433 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009434 ctxt->checkIndex = 0;
9435 }
9436 goto done;
9437 } else {
9438 if ((ctxt->sax != NULL) && (base > 0) &&
9439 (!ctxt->disableSAX)) {
9440 if (ctxt->sax->cdataBlock != NULL)
9441 ctxt->sax->cdataBlock(ctxt->userData,
9442 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009443 else if (ctxt->sax->characters != NULL)
9444 ctxt->sax->characters(ctxt->userData,
9445 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009446 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009447 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009448 ctxt->checkIndex = 0;
9449 ctxt->instate = XML_PARSER_CONTENT;
9450#ifdef DEBUG_PUSH
9451 xmlGenericError(xmlGenericErrorContext,
9452 "PP: entering CONTENT\n");
9453#endif
9454 }
9455 break;
9456 }
Owen Taylor3473f882001-02-23 17:55:21 +00009457 case XML_PARSER_MISC:
9458 SKIP_BLANKS;
9459 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009460 avail = ctxt->input->length -
9461 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009462 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009463 avail = ctxt->input->buf->buffer->use -
9464 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009465 if (avail < 2)
9466 goto done;
9467 cur = ctxt->input->cur[0];
9468 next = ctxt->input->cur[1];
9469 if ((cur == '<') && (next == '?')) {
9470 if ((!terminate) &&
9471 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9472 goto done;
9473#ifdef DEBUG_PUSH
9474 xmlGenericError(xmlGenericErrorContext,
9475 "PP: Parsing PI\n");
9476#endif
9477 xmlParsePI(ctxt);
9478 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009479 (ctxt->input->cur[2] == '-') &&
9480 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009481 if ((!terminate) &&
9482 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9483 goto done;
9484#ifdef DEBUG_PUSH
9485 xmlGenericError(xmlGenericErrorContext,
9486 "PP: Parsing Comment\n");
9487#endif
9488 xmlParseComment(ctxt);
9489 ctxt->instate = XML_PARSER_MISC;
9490 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009491 (ctxt->input->cur[2] == 'D') &&
9492 (ctxt->input->cur[3] == 'O') &&
9493 (ctxt->input->cur[4] == 'C') &&
9494 (ctxt->input->cur[5] == 'T') &&
9495 (ctxt->input->cur[6] == 'Y') &&
9496 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009497 (ctxt->input->cur[8] == 'E')) {
9498 if ((!terminate) &&
9499 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9500 goto done;
9501#ifdef DEBUG_PUSH
9502 xmlGenericError(xmlGenericErrorContext,
9503 "PP: Parsing internal subset\n");
9504#endif
9505 ctxt->inSubset = 1;
9506 xmlParseDocTypeDecl(ctxt);
9507 if (RAW == '[') {
9508 ctxt->instate = XML_PARSER_DTD;
9509#ifdef DEBUG_PUSH
9510 xmlGenericError(xmlGenericErrorContext,
9511 "PP: entering DTD\n");
9512#endif
9513 } else {
9514 /*
9515 * Create and update the external subset.
9516 */
9517 ctxt->inSubset = 2;
9518 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9519 (ctxt->sax->externalSubset != NULL))
9520 ctxt->sax->externalSubset(ctxt->userData,
9521 ctxt->intSubName, ctxt->extSubSystem,
9522 ctxt->extSubURI);
9523 ctxt->inSubset = 0;
9524 ctxt->instate = XML_PARSER_PROLOG;
9525#ifdef DEBUG_PUSH
9526 xmlGenericError(xmlGenericErrorContext,
9527 "PP: entering PROLOG\n");
9528#endif
9529 }
9530 } else if ((cur == '<') && (next == '!') &&
9531 (avail < 9)) {
9532 goto done;
9533 } else {
9534 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009535 ctxt->progressive = 1;
9536 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009537#ifdef DEBUG_PUSH
9538 xmlGenericError(xmlGenericErrorContext,
9539 "PP: entering START_TAG\n");
9540#endif
9541 }
9542 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009543 case XML_PARSER_PROLOG:
9544 SKIP_BLANKS;
9545 if (ctxt->input->buf == NULL)
9546 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9547 else
9548 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9549 if (avail < 2)
9550 goto done;
9551 cur = ctxt->input->cur[0];
9552 next = ctxt->input->cur[1];
9553 if ((cur == '<') && (next == '?')) {
9554 if ((!terminate) &&
9555 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9556 goto done;
9557#ifdef DEBUG_PUSH
9558 xmlGenericError(xmlGenericErrorContext,
9559 "PP: Parsing PI\n");
9560#endif
9561 xmlParsePI(ctxt);
9562 } else if ((cur == '<') && (next == '!') &&
9563 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9564 if ((!terminate) &&
9565 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9566 goto done;
9567#ifdef DEBUG_PUSH
9568 xmlGenericError(xmlGenericErrorContext,
9569 "PP: Parsing Comment\n");
9570#endif
9571 xmlParseComment(ctxt);
9572 ctxt->instate = XML_PARSER_PROLOG;
9573 } else if ((cur == '<') && (next == '!') &&
9574 (avail < 4)) {
9575 goto done;
9576 } else {
9577 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009578 if (ctxt->progressive == 0)
9579 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009580 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009581#ifdef DEBUG_PUSH
9582 xmlGenericError(xmlGenericErrorContext,
9583 "PP: entering START_TAG\n");
9584#endif
9585 }
9586 break;
9587 case XML_PARSER_EPILOG:
9588 SKIP_BLANKS;
9589 if (ctxt->input->buf == NULL)
9590 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9591 else
9592 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9593 if (avail < 2)
9594 goto done;
9595 cur = ctxt->input->cur[0];
9596 next = ctxt->input->cur[1];
9597 if ((cur == '<') && (next == '?')) {
9598 if ((!terminate) &&
9599 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9600 goto done;
9601#ifdef DEBUG_PUSH
9602 xmlGenericError(xmlGenericErrorContext,
9603 "PP: Parsing PI\n");
9604#endif
9605 xmlParsePI(ctxt);
9606 ctxt->instate = XML_PARSER_EPILOG;
9607 } else if ((cur == '<') && (next == '!') &&
9608 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9609 if ((!terminate) &&
9610 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9611 goto done;
9612#ifdef DEBUG_PUSH
9613 xmlGenericError(xmlGenericErrorContext,
9614 "PP: Parsing Comment\n");
9615#endif
9616 xmlParseComment(ctxt);
9617 ctxt->instate = XML_PARSER_EPILOG;
9618 } else if ((cur == '<') && (next == '!') &&
9619 (avail < 4)) {
9620 goto done;
9621 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009623 ctxt->instate = XML_PARSER_EOF;
9624#ifdef DEBUG_PUSH
9625 xmlGenericError(xmlGenericErrorContext,
9626 "PP: entering EOF\n");
9627#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009628 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009629 ctxt->sax->endDocument(ctxt->userData);
9630 goto done;
9631 }
9632 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009633 case XML_PARSER_DTD: {
9634 /*
9635 * Sorry but progressive parsing of the internal subset
9636 * is not expected to be supported. We first check that
9637 * the full content of the internal subset is available and
9638 * the parsing is launched only at that point.
9639 * Internal subset ends up with "']' S? '>'" in an unescaped
9640 * section and not in a ']]>' sequence which are conditional
9641 * sections (whoever argued to keep that crap in XML deserve
9642 * a place in hell !).
9643 */
9644 int base, i;
9645 xmlChar *buf;
9646 xmlChar quote = 0;
9647
9648 base = ctxt->input->cur - ctxt->input->base;
9649 if (base < 0) return(0);
9650 if (ctxt->checkIndex > base)
9651 base = ctxt->checkIndex;
9652 buf = ctxt->input->buf->buffer->content;
9653 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9654 base++) {
9655 if (quote != 0) {
9656 if (buf[base] == quote)
9657 quote = 0;
9658 continue;
9659 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009660 if ((quote == 0) && (buf[base] == '<')) {
9661 int found = 0;
9662 /* special handling of comments */
9663 if (((unsigned int) base + 4 <
9664 ctxt->input->buf->buffer->use) &&
9665 (buf[base + 1] == '!') &&
9666 (buf[base + 2] == '-') &&
9667 (buf[base + 3] == '-')) {
9668 for (;(unsigned int) base + 3 <
9669 ctxt->input->buf->buffer->use; base++) {
9670 if ((buf[base] == '-') &&
9671 (buf[base + 1] == '-') &&
9672 (buf[base + 2] == '>')) {
9673 found = 1;
9674 base += 2;
9675 break;
9676 }
9677 }
9678 if (!found)
9679 break;
9680 continue;
9681 }
9682 }
Owen Taylor3473f882001-02-23 17:55:21 +00009683 if (buf[base] == '"') {
9684 quote = '"';
9685 continue;
9686 }
9687 if (buf[base] == '\'') {
9688 quote = '\'';
9689 continue;
9690 }
9691 if (buf[base] == ']') {
9692 if ((unsigned int) base +1 >=
9693 ctxt->input->buf->buffer->use)
9694 break;
9695 if (buf[base + 1] == ']') {
9696 /* conditional crap, skip both ']' ! */
9697 base++;
9698 continue;
9699 }
9700 for (i = 0;
9701 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9702 i++) {
9703 if (buf[base + i] == '>')
9704 goto found_end_int_subset;
9705 }
9706 break;
9707 }
9708 }
9709 /*
9710 * We didn't found the end of the Internal subset
9711 */
9712 if (quote == 0)
9713 ctxt->checkIndex = base;
9714#ifdef DEBUG_PUSH
9715 if (next == 0)
9716 xmlGenericError(xmlGenericErrorContext,
9717 "PP: lookup of int subset end filed\n");
9718#endif
9719 goto done;
9720
9721found_end_int_subset:
9722 xmlParseInternalSubset(ctxt);
9723 ctxt->inSubset = 2;
9724 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9725 (ctxt->sax->externalSubset != NULL))
9726 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9727 ctxt->extSubSystem, ctxt->extSubURI);
9728 ctxt->inSubset = 0;
9729 ctxt->instate = XML_PARSER_PROLOG;
9730 ctxt->checkIndex = 0;
9731#ifdef DEBUG_PUSH
9732 xmlGenericError(xmlGenericErrorContext,
9733 "PP: entering PROLOG\n");
9734#endif
9735 break;
9736 }
9737 case XML_PARSER_COMMENT:
9738 xmlGenericError(xmlGenericErrorContext,
9739 "PP: internal error, state == COMMENT\n");
9740 ctxt->instate = XML_PARSER_CONTENT;
9741#ifdef DEBUG_PUSH
9742 xmlGenericError(xmlGenericErrorContext,
9743 "PP: entering CONTENT\n");
9744#endif
9745 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009746 case XML_PARSER_IGNORE:
9747 xmlGenericError(xmlGenericErrorContext,
9748 "PP: internal error, state == IGNORE");
9749 ctxt->instate = XML_PARSER_DTD;
9750#ifdef DEBUG_PUSH
9751 xmlGenericError(xmlGenericErrorContext,
9752 "PP: entering DTD\n");
9753#endif
9754 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009755 case XML_PARSER_PI:
9756 xmlGenericError(xmlGenericErrorContext,
9757 "PP: internal error, state == PI\n");
9758 ctxt->instate = XML_PARSER_CONTENT;
9759#ifdef DEBUG_PUSH
9760 xmlGenericError(xmlGenericErrorContext,
9761 "PP: entering CONTENT\n");
9762#endif
9763 break;
9764 case XML_PARSER_ENTITY_DECL:
9765 xmlGenericError(xmlGenericErrorContext,
9766 "PP: internal error, state == ENTITY_DECL\n");
9767 ctxt->instate = XML_PARSER_DTD;
9768#ifdef DEBUG_PUSH
9769 xmlGenericError(xmlGenericErrorContext,
9770 "PP: entering DTD\n");
9771#endif
9772 break;
9773 case XML_PARSER_ENTITY_VALUE:
9774 xmlGenericError(xmlGenericErrorContext,
9775 "PP: internal error, state == ENTITY_VALUE\n");
9776 ctxt->instate = XML_PARSER_CONTENT;
9777#ifdef DEBUG_PUSH
9778 xmlGenericError(xmlGenericErrorContext,
9779 "PP: entering DTD\n");
9780#endif
9781 break;
9782 case XML_PARSER_ATTRIBUTE_VALUE:
9783 xmlGenericError(xmlGenericErrorContext,
9784 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9785 ctxt->instate = XML_PARSER_START_TAG;
9786#ifdef DEBUG_PUSH
9787 xmlGenericError(xmlGenericErrorContext,
9788 "PP: entering START_TAG\n");
9789#endif
9790 break;
9791 case XML_PARSER_SYSTEM_LITERAL:
9792 xmlGenericError(xmlGenericErrorContext,
9793 "PP: internal error, state == SYSTEM_LITERAL\n");
9794 ctxt->instate = XML_PARSER_START_TAG;
9795#ifdef DEBUG_PUSH
9796 xmlGenericError(xmlGenericErrorContext,
9797 "PP: entering START_TAG\n");
9798#endif
9799 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009800 case XML_PARSER_PUBLIC_LITERAL:
9801 xmlGenericError(xmlGenericErrorContext,
9802 "PP: internal error, state == PUBLIC_LITERAL\n");
9803 ctxt->instate = XML_PARSER_START_TAG;
9804#ifdef DEBUG_PUSH
9805 xmlGenericError(xmlGenericErrorContext,
9806 "PP: entering START_TAG\n");
9807#endif
9808 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009809 }
9810 }
9811done:
9812#ifdef DEBUG_PUSH
9813 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9814#endif
9815 return(ret);
9816}
9817
9818/**
Owen Taylor3473f882001-02-23 17:55:21 +00009819 * xmlParseChunk:
9820 * @ctxt: an XML parser context
9821 * @chunk: an char array
9822 * @size: the size in byte of the chunk
9823 * @terminate: last chunk indicator
9824 *
9825 * Parse a Chunk of memory
9826 *
9827 * Returns zero if no error, the xmlParserErrors otherwise.
9828 */
9829int
9830xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9831 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009832 if (ctxt == NULL)
9833 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009834 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9835 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009836 if (ctxt->instate == XML_PARSER_START)
9837 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009838 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9839 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9840 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9841 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009842 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009843
William M. Bracka3215c72004-07-31 16:24:01 +00009844 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9845 if (res < 0) {
9846 ctxt->errNo = XML_PARSER_EOF;
9847 ctxt->disableSAX = 1;
9848 return (XML_PARSER_EOF);
9849 }
Owen Taylor3473f882001-02-23 17:55:21 +00009850 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9851 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009852 ctxt->input->end =
9853 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009854#ifdef DEBUG_PUSH
9855 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9856#endif
9857
Owen Taylor3473f882001-02-23 17:55:21 +00009858 } else if (ctxt->instate != XML_PARSER_EOF) {
9859 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9860 xmlParserInputBufferPtr in = ctxt->input->buf;
9861 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9862 (in->raw != NULL)) {
9863 int nbchars;
9864
9865 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9866 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009867 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009868 xmlGenericError(xmlGenericErrorContext,
9869 "xmlParseChunk: encoder error\n");
9870 return(XML_ERR_INVALID_ENCODING);
9871 }
9872 }
9873 }
9874 }
9875 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009876 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9877 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009878 if (terminate) {
9879 /*
9880 * Check for termination
9881 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009882 int avail = 0;
9883
9884 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009885 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009886 avail = ctxt->input->length -
9887 (ctxt->input->cur - ctxt->input->base);
9888 else
9889 avail = ctxt->input->buf->buffer->use -
9890 (ctxt->input->cur - ctxt->input->base);
9891 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009892
Owen Taylor3473f882001-02-23 17:55:21 +00009893 if ((ctxt->instate != XML_PARSER_EOF) &&
9894 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009895 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009896 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009897 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009898 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009899 }
Owen Taylor3473f882001-02-23 17:55:21 +00009900 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009901 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009902 ctxt->sax->endDocument(ctxt->userData);
9903 }
9904 ctxt->instate = XML_PARSER_EOF;
9905 }
9906 return((xmlParserErrors) ctxt->errNo);
9907}
9908
9909/************************************************************************
9910 * *
9911 * I/O front end functions to the parser *
9912 * *
9913 ************************************************************************/
9914
9915/**
9916 * xmlStopParser:
9917 * @ctxt: an XML parser context
9918 *
9919 * Blocks further parser processing
9920 */
9921void
9922xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009923 if (ctxt == NULL)
9924 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009925 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009926 ctxt->disableSAX = 1;
William M. Brack230c5502004-12-20 16:18:49 +00009927 if (ctxt->input != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009928 ctxt->input->cur = BAD_CAST"";
William M. Brack230c5502004-12-20 16:18:49 +00009929 ctxt->input->base = ctxt->input->cur;
9930 }
Owen Taylor3473f882001-02-23 17:55:21 +00009931}
9932
9933/**
9934 * xmlCreatePushParserCtxt:
9935 * @sax: a SAX handler
9936 * @user_data: The user data returned on SAX callbacks
9937 * @chunk: a pointer to an array of chars
9938 * @size: number of chars in the array
9939 * @filename: an optional file name or URI
9940 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009941 * Create a parser context for using the XML parser in push mode.
9942 * If @buffer and @size are non-NULL, the data is used to detect
9943 * the encoding. The remaining characters will be parsed so they
9944 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009945 * To allow content encoding detection, @size should be >= 4
9946 * The value of @filename is used for fetching external entities
9947 * and error/warning reports.
9948 *
9949 * Returns the new parser context or NULL
9950 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009951
Owen Taylor3473f882001-02-23 17:55:21 +00009952xmlParserCtxtPtr
9953xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9954 const char *chunk, int size, const char *filename) {
9955 xmlParserCtxtPtr ctxt;
9956 xmlParserInputPtr inputStream;
9957 xmlParserInputBufferPtr buf;
9958 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9959
9960 /*
9961 * plug some encoding conversion routines
9962 */
9963 if ((chunk != NULL) && (size >= 4))
9964 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9965
9966 buf = xmlAllocParserInputBuffer(enc);
9967 if (buf == NULL) return(NULL);
9968
9969 ctxt = xmlNewParserCtxt();
9970 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009971 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009972 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009973 return(NULL);
9974 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009975 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009976 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9977 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009978 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009979 xmlFreeParserInputBuffer(buf);
9980 xmlFreeParserCtxt(ctxt);
9981 return(NULL);
9982 }
Owen Taylor3473f882001-02-23 17:55:21 +00009983 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009984#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009985 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009986#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009987 xmlFree(ctxt->sax);
9988 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9989 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009990 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009991 xmlFreeParserInputBuffer(buf);
9992 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009993 return(NULL);
9994 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +00009995 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
9996 if (sax->initialized == XML_SAX2_MAGIC)
9997 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
9998 else
9999 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010000 if (user_data != NULL)
10001 ctxt->userData = user_data;
10002 }
10003 if (filename == NULL) {
10004 ctxt->directory = NULL;
10005 } else {
10006 ctxt->directory = xmlParserGetDirectory(filename);
10007 }
10008
10009 inputStream = xmlNewInputStream(ctxt);
10010 if (inputStream == NULL) {
10011 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010012 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010013 return(NULL);
10014 }
10015
10016 if (filename == NULL)
10017 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010018 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010019 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010020 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010021 if (inputStream->filename == NULL) {
10022 xmlFreeParserCtxt(ctxt);
10023 xmlFreeParserInputBuffer(buf);
10024 return(NULL);
10025 }
10026 }
Owen Taylor3473f882001-02-23 17:55:21 +000010027 inputStream->buf = buf;
10028 inputStream->base = inputStream->buf->buffer->content;
10029 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010030 inputStream->end =
10031 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 inputPush(ctxt, inputStream);
10034
10035 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10036 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010037 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10038 int cur = ctxt->input->cur - ctxt->input->base;
10039
Owen Taylor3473f882001-02-23 17:55:21 +000010040 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010041
10042 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10043 ctxt->input->cur = ctxt->input->base + cur;
10044 ctxt->input->end =
10045 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010046#ifdef DEBUG_PUSH
10047 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10048#endif
10049 }
10050
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010051 if (enc != XML_CHAR_ENCODING_NONE) {
10052 xmlSwitchEncoding(ctxt, enc);
10053 }
10054
Owen Taylor3473f882001-02-23 17:55:21 +000010055 return(ctxt);
10056}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010057#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010058
10059/**
10060 * xmlCreateIOParserCtxt:
10061 * @sax: a SAX handler
10062 * @user_data: The user data returned on SAX callbacks
10063 * @ioread: an I/O read function
10064 * @ioclose: an I/O close function
10065 * @ioctx: an I/O handler
10066 * @enc: the charset encoding if known
10067 *
10068 * Create a parser context for using the XML parser with an existing
10069 * I/O stream
10070 *
10071 * Returns the new parser context or NULL
10072 */
10073xmlParserCtxtPtr
10074xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10075 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10076 void *ioctx, xmlCharEncoding enc) {
10077 xmlParserCtxtPtr ctxt;
10078 xmlParserInputPtr inputStream;
10079 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010080
10081 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010082
10083 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10084 if (buf == NULL) return(NULL);
10085
10086 ctxt = xmlNewParserCtxt();
10087 if (ctxt == NULL) {
10088 xmlFree(buf);
10089 return(NULL);
10090 }
10091 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010092#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010093 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010094#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010095 xmlFree(ctxt->sax);
10096 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10097 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010098 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010099 xmlFree(ctxt);
10100 return(NULL);
10101 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010102 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10103 if (sax->initialized == XML_SAX2_MAGIC)
10104 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10105 else
10106 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010107 if (user_data != NULL)
10108 ctxt->userData = user_data;
10109 }
10110
10111 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10112 if (inputStream == NULL) {
10113 xmlFreeParserCtxt(ctxt);
10114 return(NULL);
10115 }
10116 inputPush(ctxt, inputStream);
10117
10118 return(ctxt);
10119}
10120
Daniel Veillard4432df22003-09-28 18:58:27 +000010121#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010122/************************************************************************
10123 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010124 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010125 * *
10126 ************************************************************************/
10127
10128/**
10129 * xmlIOParseDTD:
10130 * @sax: the SAX handler block or NULL
10131 * @input: an Input Buffer
10132 * @enc: the charset encoding if known
10133 *
10134 * Load and parse a DTD
10135 *
10136 * Returns the resulting xmlDtdPtr or NULL in case of error.
10137 * @input will be freed at parsing end.
10138 */
10139
10140xmlDtdPtr
10141xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10142 xmlCharEncoding enc) {
10143 xmlDtdPtr ret = NULL;
10144 xmlParserCtxtPtr ctxt;
10145 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010146 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010147
10148 if (input == NULL)
10149 return(NULL);
10150
10151 ctxt = xmlNewParserCtxt();
10152 if (ctxt == NULL) {
10153 return(NULL);
10154 }
10155
10156 /*
10157 * Set-up the SAX context
10158 */
10159 if (sax != NULL) {
10160 if (ctxt->sax != NULL)
10161 xmlFree(ctxt->sax);
10162 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010163 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010164 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010165 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010166
10167 /*
10168 * generate a parser input from the I/O handler
10169 */
10170
Daniel Veillard43caefb2003-12-07 19:32:22 +000010171 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010172 if (pinput == NULL) {
10173 if (sax != NULL) ctxt->sax = NULL;
10174 xmlFreeParserCtxt(ctxt);
10175 return(NULL);
10176 }
10177
10178 /*
10179 * plug some encoding conversion routines here.
10180 */
10181 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010182 if (enc != XML_CHAR_ENCODING_NONE) {
10183 xmlSwitchEncoding(ctxt, enc);
10184 }
Owen Taylor3473f882001-02-23 17:55:21 +000010185
10186 pinput->filename = NULL;
10187 pinput->line = 1;
10188 pinput->col = 1;
10189 pinput->base = ctxt->input->cur;
10190 pinput->cur = ctxt->input->cur;
10191 pinput->free = NULL;
10192
10193 /*
10194 * let's parse that entity knowing it's an external subset.
10195 */
10196 ctxt->inSubset = 2;
10197 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10198 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10199 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010200
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010201 if ((enc == XML_CHAR_ENCODING_NONE) &&
10202 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010203 /*
10204 * Get the 4 first bytes and decode the charset
10205 * if enc != XML_CHAR_ENCODING_NONE
10206 * plug some encoding conversion routines.
10207 */
10208 start[0] = RAW;
10209 start[1] = NXT(1);
10210 start[2] = NXT(2);
10211 start[3] = NXT(3);
10212 enc = xmlDetectCharEncoding(start, 4);
10213 if (enc != XML_CHAR_ENCODING_NONE) {
10214 xmlSwitchEncoding(ctxt, enc);
10215 }
10216 }
10217
Owen Taylor3473f882001-02-23 17:55:21 +000010218 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10219
10220 if (ctxt->myDoc != NULL) {
10221 if (ctxt->wellFormed) {
10222 ret = ctxt->myDoc->extSubset;
10223 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010224 if (ret != NULL) {
10225 xmlNodePtr tmp;
10226
10227 ret->doc = NULL;
10228 tmp = ret->children;
10229 while (tmp != NULL) {
10230 tmp->doc = NULL;
10231 tmp = tmp->next;
10232 }
10233 }
Owen Taylor3473f882001-02-23 17:55:21 +000010234 } else {
10235 ret = NULL;
10236 }
10237 xmlFreeDoc(ctxt->myDoc);
10238 ctxt->myDoc = NULL;
10239 }
10240 if (sax != NULL) ctxt->sax = NULL;
10241 xmlFreeParserCtxt(ctxt);
10242
10243 return(ret);
10244}
10245
10246/**
10247 * xmlSAXParseDTD:
10248 * @sax: the SAX handler block
10249 * @ExternalID: a NAME* containing the External ID of the DTD
10250 * @SystemID: a NAME* containing the URL to the DTD
10251 *
10252 * Load and parse an external subset.
10253 *
10254 * Returns the resulting xmlDtdPtr or NULL in case of error.
10255 */
10256
10257xmlDtdPtr
10258xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10259 const xmlChar *SystemID) {
10260 xmlDtdPtr ret = NULL;
10261 xmlParserCtxtPtr ctxt;
10262 xmlParserInputPtr input = NULL;
10263 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010264 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010265
10266 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10267
10268 ctxt = xmlNewParserCtxt();
10269 if (ctxt == NULL) {
10270 return(NULL);
10271 }
10272
10273 /*
10274 * Set-up the SAX context
10275 */
10276 if (sax != NULL) {
10277 if (ctxt->sax != NULL)
10278 xmlFree(ctxt->sax);
10279 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010280 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010281 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010282
10283 /*
10284 * Canonicalise the system ID
10285 */
10286 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010287 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010288 xmlFreeParserCtxt(ctxt);
10289 return(NULL);
10290 }
Owen Taylor3473f882001-02-23 17:55:21 +000010291
10292 /*
10293 * Ask the Entity resolver to load the damn thing
10294 */
10295
10296 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010297 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010298 if (input == NULL) {
10299 if (sax != NULL) ctxt->sax = NULL;
10300 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010301 if (systemIdCanonic != NULL)
10302 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010303 return(NULL);
10304 }
10305
10306 /*
10307 * plug some encoding conversion routines here.
10308 */
10309 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010310 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10311 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10312 xmlSwitchEncoding(ctxt, enc);
10313 }
Owen Taylor3473f882001-02-23 17:55:21 +000010314
10315 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010316 input->filename = (char *) systemIdCanonic;
10317 else
10318 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010319 input->line = 1;
10320 input->col = 1;
10321 input->base = ctxt->input->cur;
10322 input->cur = ctxt->input->cur;
10323 input->free = NULL;
10324
10325 /*
10326 * let's parse that entity knowing it's an external subset.
10327 */
10328 ctxt->inSubset = 2;
10329 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10330 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10331 ExternalID, SystemID);
10332 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10333
10334 if (ctxt->myDoc != NULL) {
10335 if (ctxt->wellFormed) {
10336 ret = ctxt->myDoc->extSubset;
10337 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010338 if (ret != NULL) {
10339 xmlNodePtr tmp;
10340
10341 ret->doc = NULL;
10342 tmp = ret->children;
10343 while (tmp != NULL) {
10344 tmp->doc = NULL;
10345 tmp = tmp->next;
10346 }
10347 }
Owen Taylor3473f882001-02-23 17:55:21 +000010348 } else {
10349 ret = NULL;
10350 }
10351 xmlFreeDoc(ctxt->myDoc);
10352 ctxt->myDoc = NULL;
10353 }
10354 if (sax != NULL) ctxt->sax = NULL;
10355 xmlFreeParserCtxt(ctxt);
10356
10357 return(ret);
10358}
10359
Daniel Veillard4432df22003-09-28 18:58:27 +000010360
Owen Taylor3473f882001-02-23 17:55:21 +000010361/**
10362 * xmlParseDTD:
10363 * @ExternalID: a NAME* containing the External ID of the DTD
10364 * @SystemID: a NAME* containing the URL to the DTD
10365 *
10366 * Load and parse an external subset.
10367 *
10368 * Returns the resulting xmlDtdPtr or NULL in case of error.
10369 */
10370
10371xmlDtdPtr
10372xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10373 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10374}
Daniel Veillard4432df22003-09-28 18:58:27 +000010375#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010376
10377/************************************************************************
10378 * *
10379 * Front ends when parsing an Entity *
10380 * *
10381 ************************************************************************/
10382
10383/**
Owen Taylor3473f882001-02-23 17:55:21 +000010384 * xmlParseCtxtExternalEntity:
10385 * @ctx: the existing parsing context
10386 * @URL: the URL for the entity to load
10387 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010388 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010389 *
10390 * Parse an external general entity within an existing parsing context
10391 * An external general parsed entity is well-formed if it matches the
10392 * production labeled extParsedEnt.
10393 *
10394 * [78] extParsedEnt ::= TextDecl? content
10395 *
10396 * Returns 0 if the entity is well formed, -1 in case of args problem and
10397 * the parser error code otherwise
10398 */
10399
10400int
10401xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010402 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010403 xmlParserCtxtPtr ctxt;
10404 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010405 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010406 xmlSAXHandlerPtr oldsax = NULL;
10407 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010408 xmlChar start[4];
10409 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010410
Daniel Veillardce682bc2004-11-05 17:22:25 +000010411 if (ctx == NULL) return(-1);
10412
Owen Taylor3473f882001-02-23 17:55:21 +000010413 if (ctx->depth > 40) {
10414 return(XML_ERR_ENTITY_LOOP);
10415 }
10416
Daniel Veillardcda96922001-08-21 10:56:31 +000010417 if (lst != NULL)
10418 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010419 if ((URL == NULL) && (ID == NULL))
10420 return(-1);
10421 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10422 return(-1);
10423
10424
10425 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10426 if (ctxt == NULL) return(-1);
10427 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010428 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010429 oldsax = ctxt->sax;
10430 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010431 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010432 newDoc = xmlNewDoc(BAD_CAST "1.0");
10433 if (newDoc == NULL) {
10434 xmlFreeParserCtxt(ctxt);
10435 return(-1);
10436 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010437 if (ctx->myDoc->dict) {
10438 newDoc->dict = ctx->myDoc->dict;
10439 xmlDictReference(newDoc->dict);
10440 }
Owen Taylor3473f882001-02-23 17:55:21 +000010441 if (ctx->myDoc != NULL) {
10442 newDoc->intSubset = ctx->myDoc->intSubset;
10443 newDoc->extSubset = ctx->myDoc->extSubset;
10444 }
10445 if (ctx->myDoc->URL != NULL) {
10446 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10447 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010448 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10449 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010450 ctxt->sax = oldsax;
10451 xmlFreeParserCtxt(ctxt);
10452 newDoc->intSubset = NULL;
10453 newDoc->extSubset = NULL;
10454 xmlFreeDoc(newDoc);
10455 return(-1);
10456 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010457 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010458 nodePush(ctxt, newDoc->children);
10459 if (ctx->myDoc == NULL) {
10460 ctxt->myDoc = newDoc;
10461 } else {
10462 ctxt->myDoc = ctx->myDoc;
10463 newDoc->children->doc = ctx->myDoc;
10464 }
10465
Daniel Veillard87a764e2001-06-20 17:41:10 +000010466 /*
10467 * Get the 4 first bytes and decode the charset
10468 * if enc != XML_CHAR_ENCODING_NONE
10469 * plug some encoding conversion routines.
10470 */
10471 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010472 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10473 start[0] = RAW;
10474 start[1] = NXT(1);
10475 start[2] = NXT(2);
10476 start[3] = NXT(3);
10477 enc = xmlDetectCharEncoding(start, 4);
10478 if (enc != XML_CHAR_ENCODING_NONE) {
10479 xmlSwitchEncoding(ctxt, enc);
10480 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010481 }
10482
Owen Taylor3473f882001-02-23 17:55:21 +000010483 /*
10484 * Parse a possible text declaration first
10485 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010486 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010487 xmlParseTextDecl(ctxt);
10488 }
10489
10490 /*
10491 * Doing validity checking on chunk doesn't make sense
10492 */
10493 ctxt->instate = XML_PARSER_CONTENT;
10494 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010495 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010496 ctxt->loadsubset = ctx->loadsubset;
10497 ctxt->depth = ctx->depth + 1;
10498 ctxt->replaceEntities = ctx->replaceEntities;
10499 if (ctxt->validate) {
10500 ctxt->vctxt.error = ctx->vctxt.error;
10501 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010502 } else {
10503 ctxt->vctxt.error = NULL;
10504 ctxt->vctxt.warning = NULL;
10505 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010506 ctxt->vctxt.nodeTab = NULL;
10507 ctxt->vctxt.nodeNr = 0;
10508 ctxt->vctxt.nodeMax = 0;
10509 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010510 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10511 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010512 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10513 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10514 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010515 ctxt->dictNames = ctx->dictNames;
10516 ctxt->attsDefault = ctx->attsDefault;
10517 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010518 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010519
10520 xmlParseContent(ctxt);
10521
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010522 ctx->validate = ctxt->validate;
10523 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010524 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010525 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010526 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010527 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010528 }
10529 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010530 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 }
10532
10533 if (!ctxt->wellFormed) {
10534 if (ctxt->errNo == 0)
10535 ret = 1;
10536 else
10537 ret = ctxt->errNo;
10538 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010539 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010540 xmlNodePtr cur;
10541
10542 /*
10543 * Return the newly created nodeset after unlinking it from
10544 * they pseudo parent.
10545 */
10546 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010547 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010548 while (cur != NULL) {
10549 cur->parent = NULL;
10550 cur = cur->next;
10551 }
10552 newDoc->children->children = NULL;
10553 }
10554 ret = 0;
10555 }
10556 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010557 ctxt->dict = NULL;
10558 ctxt->attsDefault = NULL;
10559 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlFreeParserCtxt(ctxt);
10561 newDoc->intSubset = NULL;
10562 newDoc->extSubset = NULL;
10563 xmlFreeDoc(newDoc);
10564
10565 return(ret);
10566}
10567
10568/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010569 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010570 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010571 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010572 * @sax: the SAX handler bloc (possibly NULL)
10573 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10574 * @depth: Used for loop detection, use 0
10575 * @URL: the URL for the entity to load
10576 * @ID: the System ID for the entity to load
10577 * @list: the return value for the set of parsed nodes
10578 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010579 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010580 *
10581 * Returns 0 if the entity is well formed, -1 in case of args problem and
10582 * the parser error code otherwise
10583 */
10584
Daniel Veillard7d515752003-09-26 19:12:37 +000010585static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010586xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10587 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010588 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010589 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010590 xmlParserCtxtPtr ctxt;
10591 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010592 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010593 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010594 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010595 xmlChar start[4];
10596 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010597
10598 if (depth > 40) {
10599 return(XML_ERR_ENTITY_LOOP);
10600 }
10601
10602
10603
10604 if (list != NULL)
10605 *list = NULL;
10606 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010607 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010608 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010609 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010610
10611
10612 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010613 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010614 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010615 if (oldctxt != NULL) {
10616 ctxt->_private = oldctxt->_private;
10617 ctxt->loadsubset = oldctxt->loadsubset;
10618 ctxt->validate = oldctxt->validate;
10619 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010620 ctxt->record_info = oldctxt->record_info;
10621 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10622 ctxt->node_seq.length = oldctxt->node_seq.length;
10623 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010624 } else {
10625 /*
10626 * Doing validity checking on chunk without context
10627 * doesn't make sense
10628 */
10629 ctxt->_private = NULL;
10630 ctxt->validate = 0;
10631 ctxt->external = 2;
10632 ctxt->loadsubset = 0;
10633 }
Owen Taylor3473f882001-02-23 17:55:21 +000010634 if (sax != NULL) {
10635 oldsax = ctxt->sax;
10636 ctxt->sax = sax;
10637 if (user_data != NULL)
10638 ctxt->userData = user_data;
10639 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010640 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010641 newDoc = xmlNewDoc(BAD_CAST "1.0");
10642 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010643 ctxt->node_seq.maximum = 0;
10644 ctxt->node_seq.length = 0;
10645 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010646 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010647 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010648 }
10649 if (doc != NULL) {
10650 newDoc->intSubset = doc->intSubset;
10651 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010652 newDoc->dict = doc->dict;
10653 } else if (oldctxt != NULL) {
10654 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010655 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010656 xmlDictReference(newDoc->dict);
10657
Owen Taylor3473f882001-02-23 17:55:21 +000010658 if (doc->URL != NULL) {
10659 newDoc->URL = xmlStrdup(doc->URL);
10660 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010661 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10662 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010663 if (sax != NULL)
10664 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010665 ctxt->node_seq.maximum = 0;
10666 ctxt->node_seq.length = 0;
10667 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010668 xmlFreeParserCtxt(ctxt);
10669 newDoc->intSubset = NULL;
10670 newDoc->extSubset = NULL;
10671 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010672 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010673 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010674 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010675 nodePush(ctxt, newDoc->children);
10676 if (doc == NULL) {
10677 ctxt->myDoc = newDoc;
10678 } else {
10679 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010680 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010681 }
10682
Daniel Veillard87a764e2001-06-20 17:41:10 +000010683 /*
10684 * Get the 4 first bytes and decode the charset
10685 * if enc != XML_CHAR_ENCODING_NONE
10686 * plug some encoding conversion routines.
10687 */
10688 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010689 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10690 start[0] = RAW;
10691 start[1] = NXT(1);
10692 start[2] = NXT(2);
10693 start[3] = NXT(3);
10694 enc = xmlDetectCharEncoding(start, 4);
10695 if (enc != XML_CHAR_ENCODING_NONE) {
10696 xmlSwitchEncoding(ctxt, enc);
10697 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010698 }
10699
Owen Taylor3473f882001-02-23 17:55:21 +000010700 /*
10701 * Parse a possible text declaration first
10702 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010703 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010704 xmlParseTextDecl(ctxt);
10705 }
10706
Owen Taylor3473f882001-02-23 17:55:21 +000010707 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010708 ctxt->depth = depth;
10709
10710 xmlParseContent(ctxt);
10711
Daniel Veillard561b7f82002-03-20 21:55:57 +000010712 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010713 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010714 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010715 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010716 }
10717 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010718 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010719 }
10720
10721 if (!ctxt->wellFormed) {
10722 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010723 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010724 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010725 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010726 } else {
10727 if (list != NULL) {
10728 xmlNodePtr cur;
10729
10730 /*
10731 * Return the newly created nodeset after unlinking it from
10732 * they pseudo parent.
10733 */
10734 cur = newDoc->children->children;
10735 *list = cur;
10736 while (cur != NULL) {
10737 cur->parent = NULL;
10738 cur = cur->next;
10739 }
10740 newDoc->children->children = NULL;
10741 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010742 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010743 }
10744 if (sax != NULL)
10745 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010746 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10747 oldctxt->node_seq.length = ctxt->node_seq.length;
10748 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010749 ctxt->node_seq.maximum = 0;
10750 ctxt->node_seq.length = 0;
10751 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010752 xmlFreeParserCtxt(ctxt);
10753 newDoc->intSubset = NULL;
10754 newDoc->extSubset = NULL;
10755 xmlFreeDoc(newDoc);
10756
10757 return(ret);
10758}
10759
Daniel Veillard81273902003-09-30 00:43:48 +000010760#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010761/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010762 * xmlParseExternalEntity:
10763 * @doc: the document the chunk pertains to
10764 * @sax: the SAX handler bloc (possibly NULL)
10765 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10766 * @depth: Used for loop detection, use 0
10767 * @URL: the URL for the entity to load
10768 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010769 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010770 *
10771 * Parse an external general entity
10772 * An external general parsed entity is well-formed if it matches the
10773 * production labeled extParsedEnt.
10774 *
10775 * [78] extParsedEnt ::= TextDecl? content
10776 *
10777 * Returns 0 if the entity is well formed, -1 in case of args problem and
10778 * the parser error code otherwise
10779 */
10780
10781int
10782xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010783 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010784 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010785 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010786}
10787
10788/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010789 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010790 * @doc: the document the chunk pertains to
10791 * @sax: the SAX handler bloc (possibly NULL)
10792 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10793 * @depth: Used for loop detection, use 0
10794 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010795 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010796 *
10797 * Parse a well-balanced chunk of an XML document
10798 * called by the parser
10799 * The allowed sequence for the Well Balanced Chunk is the one defined by
10800 * the content production in the XML grammar:
10801 *
10802 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10803 *
10804 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10805 * the parser error code otherwise
10806 */
10807
10808int
10809xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010810 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010811 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10812 depth, string, lst, 0 );
10813}
Daniel Veillard81273902003-09-30 00:43:48 +000010814#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010815
10816/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010817 * xmlParseBalancedChunkMemoryInternal:
10818 * @oldctxt: the existing parsing context
10819 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10820 * @user_data: the user data field for the parser context
10821 * @lst: the return value for the set of parsed nodes
10822 *
10823 *
10824 * Parse a well-balanced chunk of an XML document
10825 * called by the parser
10826 * The allowed sequence for the Well Balanced Chunk is the one defined by
10827 * the content production in the XML grammar:
10828 *
10829 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10830 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010831 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10832 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010833 *
10834 * In case recover is set to 1, the nodelist will not be empty even if
10835 * the parsed chunk is not well balanced.
10836 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010837static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010838xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10839 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10840 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010841 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010842 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010843 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010844 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010845 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010846 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010847 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010848
10849 if (oldctxt->depth > 40) {
10850 return(XML_ERR_ENTITY_LOOP);
10851 }
10852
10853
10854 if (lst != NULL)
10855 *lst = NULL;
10856 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010857 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010858
10859 size = xmlStrlen(string);
10860
10861 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010862 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010863 if (user_data != NULL)
10864 ctxt->userData = user_data;
10865 else
10866 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010867 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10868 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010869 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10870 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10871 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010872
10873 oldsax = ctxt->sax;
10874 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010875 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010876 ctxt->replaceEntities = oldctxt->replaceEntities;
10877 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010878
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010879 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010880 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010881 newDoc = xmlNewDoc(BAD_CAST "1.0");
10882 if (newDoc == NULL) {
10883 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010884 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010885 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010886 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010887 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010888 newDoc->dict = ctxt->dict;
10889 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010890 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010891 } else {
10892 ctxt->myDoc = oldctxt->myDoc;
10893 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010894 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010895 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010896 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10897 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010898 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010899 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010900 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010901 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010902 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010903 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010904 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010905 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010906 ctxt->myDoc->children = NULL;
10907 ctxt->myDoc->last = NULL;
10908 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010909 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010910 ctxt->instate = XML_PARSER_CONTENT;
10911 ctxt->depth = oldctxt->depth + 1;
10912
Daniel Veillard328f48c2002-11-15 15:24:34 +000010913 ctxt->validate = 0;
10914 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010915 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10916 /*
10917 * ID/IDREF registration will be done in xmlValidateElement below
10918 */
10919 ctxt->loadsubset |= XML_SKIP_IDS;
10920 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010921 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010922 ctxt->attsDefault = oldctxt->attsDefault;
10923 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010924
Daniel Veillard68e9e742002-11-16 15:35:11 +000010925 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010926 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010927 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010928 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010929 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010930 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010931 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010932 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010933 }
10934
10935 if (!ctxt->wellFormed) {
10936 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010937 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010938 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010939 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010940 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010941 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010942 }
10943
William M. Brack7b9154b2003-09-27 19:23:50 +000010944 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010945 xmlNodePtr cur;
10946
10947 /*
10948 * Return the newly created nodeset after unlinking it from
10949 * they pseudo parent.
10950 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010951 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010952 *lst = cur;
10953 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010954#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010955 if (oldctxt->validate && oldctxt->wellFormed &&
10956 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10957 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10958 oldctxt->myDoc, cur);
10959 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010960#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010961 cur->parent = NULL;
10962 cur = cur->next;
10963 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010964 ctxt->myDoc->children->children = NULL;
10965 }
10966 if (ctxt->myDoc != NULL) {
10967 xmlFreeNode(ctxt->myDoc->children);
10968 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010969 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010970 }
10971
10972 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010973 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010974 ctxt->attsDefault = NULL;
10975 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010976 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010977 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010978 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010979 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010980
10981 return(ret);
10982}
10983
Daniel Veillard29b17482004-08-16 00:39:03 +000010984/**
10985 * xmlParseInNodeContext:
10986 * @node: the context node
10987 * @data: the input string
10988 * @datalen: the input string length in bytes
10989 * @options: a combination of xmlParserOption
10990 * @lst: the return value for the set of parsed nodes
10991 *
10992 * Parse a well-balanced chunk of an XML document
10993 * within the context (DTD, namespaces, etc ...) of the given node.
10994 *
10995 * The allowed sequence for the data is a Well Balanced Chunk defined by
10996 * the content production in the XML grammar:
10997 *
10998 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10999 *
11000 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11001 * error code otherwise
11002 */
11003xmlParserErrors
11004xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11005 int options, xmlNodePtr *lst) {
11006#ifdef SAX2
11007 xmlParserCtxtPtr ctxt;
11008 xmlDocPtr doc = NULL;
11009 xmlNodePtr fake, cur;
11010 int nsnr = 0;
11011
11012 xmlParserErrors ret = XML_ERR_OK;
11013
11014 /*
11015 * check all input parameters, grab the document
11016 */
11017 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11018 return(XML_ERR_INTERNAL_ERROR);
11019 switch (node->type) {
11020 case XML_ELEMENT_NODE:
11021 case XML_ATTRIBUTE_NODE:
11022 case XML_TEXT_NODE:
11023 case XML_CDATA_SECTION_NODE:
11024 case XML_ENTITY_REF_NODE:
11025 case XML_PI_NODE:
11026 case XML_COMMENT_NODE:
11027 case XML_DOCUMENT_NODE:
11028 case XML_HTML_DOCUMENT_NODE:
11029 break;
11030 default:
11031 return(XML_ERR_INTERNAL_ERROR);
11032
11033 }
11034 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11035 (node->type != XML_DOCUMENT_NODE) &&
11036 (node->type != XML_HTML_DOCUMENT_NODE))
11037 node = node->parent;
11038 if (node == NULL)
11039 return(XML_ERR_INTERNAL_ERROR);
11040 if (node->type == XML_ELEMENT_NODE)
11041 doc = node->doc;
11042 else
11043 doc = (xmlDocPtr) node;
11044 if (doc == NULL)
11045 return(XML_ERR_INTERNAL_ERROR);
11046
11047 /*
11048 * allocate a context and set-up everything not related to the
11049 * node position in the tree
11050 */
11051 if (doc->type == XML_DOCUMENT_NODE)
11052 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11053#ifdef LIBXML_HTML_ENABLED
11054 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11055 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11056#endif
11057 else
11058 return(XML_ERR_INTERNAL_ERROR);
11059
11060 if (ctxt == NULL)
11061 return(XML_ERR_NO_MEMORY);
11062 fake = xmlNewComment(NULL);
11063 if (fake == NULL) {
11064 xmlFreeParserCtxt(ctxt);
11065 return(XML_ERR_NO_MEMORY);
11066 }
11067 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011068
11069 /*
11070 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11071 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11072 * we must wait until the last moment to free the original one.
11073 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011074 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011075 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011076 xmlDictFree(ctxt->dict);
11077 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011078 } else
11079 options |= XML_PARSE_NODICT;
11080
11081 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011082 xmlDetectSAX2(ctxt);
11083 ctxt->myDoc = doc;
11084
11085 if (node->type == XML_ELEMENT_NODE) {
11086 nodePush(ctxt, node);
11087 /*
11088 * initialize the SAX2 namespaces stack
11089 */
11090 cur = node;
11091 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11092 xmlNsPtr ns = cur->nsDef;
11093 const xmlChar *iprefix, *ihref;
11094
11095 while (ns != NULL) {
11096 if (ctxt->dict) {
11097 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11098 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11099 } else {
11100 iprefix = ns->prefix;
11101 ihref = ns->href;
11102 }
11103
11104 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11105 nsPush(ctxt, iprefix, ihref);
11106 nsnr++;
11107 }
11108 ns = ns->next;
11109 }
11110 cur = cur->parent;
11111 }
11112 ctxt->instate = XML_PARSER_CONTENT;
11113 }
11114
11115 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11116 /*
11117 * ID/IDREF registration will be done in xmlValidateElement below
11118 */
11119 ctxt->loadsubset |= XML_SKIP_IDS;
11120 }
11121
11122 xmlParseContent(ctxt);
11123 nsPop(ctxt, nsnr);
11124 if ((RAW == '<') && (NXT(1) == '/')) {
11125 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11126 } else if (RAW != 0) {
11127 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11128 }
11129 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11130 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11131 ctxt->wellFormed = 0;
11132 }
11133
11134 if (!ctxt->wellFormed) {
11135 if (ctxt->errNo == 0)
11136 ret = XML_ERR_INTERNAL_ERROR;
11137 else
11138 ret = (xmlParserErrors)ctxt->errNo;
11139 } else {
11140 ret = XML_ERR_OK;
11141 }
11142
11143 /*
11144 * Return the newly created nodeset after unlinking it from
11145 * the pseudo sibling.
11146 */
11147
11148 cur = fake->next;
11149 fake->next = NULL;
11150 node->last = fake;
11151
11152 if (cur != NULL) {
11153 cur->prev = NULL;
11154 }
11155
11156 *lst = cur;
11157
11158 while (cur != NULL) {
11159 cur->parent = NULL;
11160 cur = cur->next;
11161 }
11162
11163 xmlUnlinkNode(fake);
11164 xmlFreeNode(fake);
11165
11166
11167 if (ret != XML_ERR_OK) {
11168 xmlFreeNodeList(*lst);
11169 *lst = NULL;
11170 }
William M. Brackc3f81342004-10-03 01:22:44 +000011171
William M. Brackb7b54de2004-10-06 16:38:01 +000011172 if (doc->dict != NULL)
11173 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011174 xmlFreeParserCtxt(ctxt);
11175
11176 return(ret);
11177#else /* !SAX2 */
11178 return(XML_ERR_INTERNAL_ERROR);
11179#endif
11180}
11181
Daniel Veillard81273902003-09-30 00:43:48 +000011182#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011183/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011184 * xmlParseBalancedChunkMemoryRecover:
11185 * @doc: the document the chunk pertains to
11186 * @sax: the SAX handler bloc (possibly NULL)
11187 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11188 * @depth: Used for loop detection, use 0
11189 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11190 * @lst: the return value for the set of parsed nodes
11191 * @recover: return nodes even if the data is broken (use 0)
11192 *
11193 *
11194 * Parse a well-balanced chunk of an XML document
11195 * called by the parser
11196 * The allowed sequence for the Well Balanced Chunk is the one defined by
11197 * the content production in the XML grammar:
11198 *
11199 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11200 *
11201 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11202 * the parser error code otherwise
11203 *
11204 * In case recover is set to 1, the nodelist will not be empty even if
11205 * the parsed chunk is not well balanced.
11206 */
11207int
11208xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11209 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11210 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011211 xmlParserCtxtPtr ctxt;
11212 xmlDocPtr newDoc;
11213 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011214 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011215 int size;
11216 int ret = 0;
11217
11218 if (depth > 40) {
11219 return(XML_ERR_ENTITY_LOOP);
11220 }
11221
11222
Daniel Veillardcda96922001-08-21 10:56:31 +000011223 if (lst != NULL)
11224 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011225 if (string == NULL)
11226 return(-1);
11227
11228 size = xmlStrlen(string);
11229
11230 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11231 if (ctxt == NULL) return(-1);
11232 ctxt->userData = ctxt;
11233 if (sax != NULL) {
11234 oldsax = ctxt->sax;
11235 ctxt->sax = sax;
11236 if (user_data != NULL)
11237 ctxt->userData = user_data;
11238 }
11239 newDoc = xmlNewDoc(BAD_CAST "1.0");
11240 if (newDoc == NULL) {
11241 xmlFreeParserCtxt(ctxt);
11242 return(-1);
11243 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011244 if ((doc != NULL) && (doc->dict != NULL)) {
11245 xmlDictFree(ctxt->dict);
11246 ctxt->dict = doc->dict;
11247 xmlDictReference(ctxt->dict);
11248 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11249 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11250 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11251 ctxt->dictNames = 1;
11252 } else {
11253 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11254 }
Owen Taylor3473f882001-02-23 17:55:21 +000011255 if (doc != NULL) {
11256 newDoc->intSubset = doc->intSubset;
11257 newDoc->extSubset = doc->extSubset;
11258 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011259 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11260 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011261 if (sax != NULL)
11262 ctxt->sax = oldsax;
11263 xmlFreeParserCtxt(ctxt);
11264 newDoc->intSubset = NULL;
11265 newDoc->extSubset = NULL;
11266 xmlFreeDoc(newDoc);
11267 return(-1);
11268 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011269 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11270 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011271 if (doc == NULL) {
11272 ctxt->myDoc = newDoc;
11273 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011274 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011275 newDoc->children->doc = doc;
11276 }
11277 ctxt->instate = XML_PARSER_CONTENT;
11278 ctxt->depth = depth;
11279
11280 /*
11281 * Doing validity checking on chunk doesn't make sense
11282 */
11283 ctxt->validate = 0;
11284 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011285 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011286
Daniel Veillardb39bc392002-10-26 19:29:51 +000011287 if ( doc != NULL ){
11288 content = doc->children;
11289 doc->children = NULL;
11290 xmlParseContent(ctxt);
11291 doc->children = content;
11292 }
11293 else {
11294 xmlParseContent(ctxt);
11295 }
Owen Taylor3473f882001-02-23 17:55:21 +000011296 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011297 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011298 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011299 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011300 }
11301 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011302 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011303 }
11304
11305 if (!ctxt->wellFormed) {
11306 if (ctxt->errNo == 0)
11307 ret = 1;
11308 else
11309 ret = ctxt->errNo;
11310 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011311 ret = 0;
11312 }
11313
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011314 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11315 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011316
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011317 /*
11318 * Return the newly created nodeset after unlinking it from
11319 * they pseudo parent.
11320 */
11321 cur = newDoc->children->children;
11322 *lst = cur;
11323 while (cur != NULL) {
11324 xmlSetTreeDoc(cur, doc);
11325 cur->parent = NULL;
11326 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011327 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011328 newDoc->children->children = NULL;
11329 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011330
Owen Taylor3473f882001-02-23 17:55:21 +000011331 if (sax != NULL)
11332 ctxt->sax = oldsax;
11333 xmlFreeParserCtxt(ctxt);
11334 newDoc->intSubset = NULL;
11335 newDoc->extSubset = NULL;
11336 xmlFreeDoc(newDoc);
11337
11338 return(ret);
11339}
11340
11341/**
11342 * xmlSAXParseEntity:
11343 * @sax: the SAX handler block
11344 * @filename: the filename
11345 *
11346 * parse an XML external entity out of context and build a tree.
11347 * It use the given SAX function block to handle the parsing callback.
11348 * If sax is NULL, fallback to the default DOM tree building routines.
11349 *
11350 * [78] extParsedEnt ::= TextDecl? content
11351 *
11352 * This correspond to a "Well Balanced" chunk
11353 *
11354 * Returns the resulting document tree
11355 */
11356
11357xmlDocPtr
11358xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11359 xmlDocPtr ret;
11360 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011361
11362 ctxt = xmlCreateFileParserCtxt(filename);
11363 if (ctxt == NULL) {
11364 return(NULL);
11365 }
11366 if (sax != NULL) {
11367 if (ctxt->sax != NULL)
11368 xmlFree(ctxt->sax);
11369 ctxt->sax = sax;
11370 ctxt->userData = NULL;
11371 }
11372
Owen Taylor3473f882001-02-23 17:55:21 +000011373 xmlParseExtParsedEnt(ctxt);
11374
11375 if (ctxt->wellFormed)
11376 ret = ctxt->myDoc;
11377 else {
11378 ret = NULL;
11379 xmlFreeDoc(ctxt->myDoc);
11380 ctxt->myDoc = NULL;
11381 }
11382 if (sax != NULL)
11383 ctxt->sax = NULL;
11384 xmlFreeParserCtxt(ctxt);
11385
11386 return(ret);
11387}
11388
11389/**
11390 * xmlParseEntity:
11391 * @filename: the filename
11392 *
11393 * parse an XML external entity out of context and build a tree.
11394 *
11395 * [78] extParsedEnt ::= TextDecl? content
11396 *
11397 * This correspond to a "Well Balanced" chunk
11398 *
11399 * Returns the resulting document tree
11400 */
11401
11402xmlDocPtr
11403xmlParseEntity(const char *filename) {
11404 return(xmlSAXParseEntity(NULL, filename));
11405}
Daniel Veillard81273902003-09-30 00:43:48 +000011406#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011407
11408/**
11409 * xmlCreateEntityParserCtxt:
11410 * @URL: the entity URL
11411 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011412 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011413 *
11414 * Create a parser context for an external entity
11415 * Automatic support for ZLIB/Compress compressed document is provided
11416 * by default if found at compile-time.
11417 *
11418 * Returns the new parser context or NULL
11419 */
11420xmlParserCtxtPtr
11421xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11422 const xmlChar *base) {
11423 xmlParserCtxtPtr ctxt;
11424 xmlParserInputPtr inputStream;
11425 char *directory = NULL;
11426 xmlChar *uri;
11427
11428 ctxt = xmlNewParserCtxt();
11429 if (ctxt == NULL) {
11430 return(NULL);
11431 }
11432
11433 uri = xmlBuildURI(URL, base);
11434
11435 if (uri == NULL) {
11436 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11437 if (inputStream == NULL) {
11438 xmlFreeParserCtxt(ctxt);
11439 return(NULL);
11440 }
11441
11442 inputPush(ctxt, inputStream);
11443
11444 if ((ctxt->directory == NULL) && (directory == NULL))
11445 directory = xmlParserGetDirectory((char *)URL);
11446 if ((ctxt->directory == NULL) && (directory != NULL))
11447 ctxt->directory = directory;
11448 } else {
11449 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11450 if (inputStream == NULL) {
11451 xmlFree(uri);
11452 xmlFreeParserCtxt(ctxt);
11453 return(NULL);
11454 }
11455
11456 inputPush(ctxt, inputStream);
11457
11458 if ((ctxt->directory == NULL) && (directory == NULL))
11459 directory = xmlParserGetDirectory((char *)uri);
11460 if ((ctxt->directory == NULL) && (directory != NULL))
11461 ctxt->directory = directory;
11462 xmlFree(uri);
11463 }
Owen Taylor3473f882001-02-23 17:55:21 +000011464 return(ctxt);
11465}
11466
11467/************************************************************************
11468 * *
11469 * Front ends when parsing from a file *
11470 * *
11471 ************************************************************************/
11472
11473/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011474 * xmlCreateURLParserCtxt:
11475 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011476 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011477 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011478 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011479 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011480 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011481 *
11482 * Returns the new parser context or NULL
11483 */
11484xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011485xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011486{
11487 xmlParserCtxtPtr ctxt;
11488 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011489 char *directory = NULL;
11490
Owen Taylor3473f882001-02-23 17:55:21 +000011491 ctxt = xmlNewParserCtxt();
11492 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011493 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011494 return(NULL);
11495 }
11496
Daniel Veillard61b93382003-11-03 14:28:31 +000011497 if (options != 0)
11498 xmlCtxtUseOptions(ctxt, options);
Igor Zlatkovicce076162003-02-23 13:39:39 +000011499
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011500 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011501 if (inputStream == NULL) {
11502 xmlFreeParserCtxt(ctxt);
11503 return(NULL);
11504 }
11505
Owen Taylor3473f882001-02-23 17:55:21 +000011506 inputPush(ctxt, inputStream);
11507 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011508 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011509 if ((ctxt->directory == NULL) && (directory != NULL))
11510 ctxt->directory = directory;
11511
11512 return(ctxt);
11513}
11514
Daniel Veillard61b93382003-11-03 14:28:31 +000011515/**
11516 * xmlCreateFileParserCtxt:
11517 * @filename: the filename
11518 *
11519 * Create a parser context for a file content.
11520 * Automatic support for ZLIB/Compress compressed document is provided
11521 * by default if found at compile-time.
11522 *
11523 * Returns the new parser context or NULL
11524 */
11525xmlParserCtxtPtr
11526xmlCreateFileParserCtxt(const char *filename)
11527{
11528 return(xmlCreateURLParserCtxt(filename, 0));
11529}
11530
Daniel Veillard81273902003-09-30 00:43:48 +000011531#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011532/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011533 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011534 * @sax: the SAX handler block
11535 * @filename: the filename
11536 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11537 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011538 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011539 *
11540 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11541 * compressed document is provided by default if found at compile-time.
11542 * It use the given SAX function block to handle the parsing callback.
11543 * If sax is NULL, fallback to the default DOM tree building routines.
11544 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011545 * User data (void *) is stored within the parser context in the
11546 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011547 *
Owen Taylor3473f882001-02-23 17:55:21 +000011548 * Returns the resulting document tree
11549 */
11550
11551xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011552xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11553 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011554 xmlDocPtr ret;
11555 xmlParserCtxtPtr ctxt;
11556 char *directory = NULL;
11557
Daniel Veillard635ef722001-10-29 11:48:19 +000011558 xmlInitParser();
11559
Owen Taylor3473f882001-02-23 17:55:21 +000011560 ctxt = xmlCreateFileParserCtxt(filename);
11561 if (ctxt == NULL) {
11562 return(NULL);
11563 }
11564 if (sax != NULL) {
11565 if (ctxt->sax != NULL)
11566 xmlFree(ctxt->sax);
11567 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011568 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011569 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011570 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011571 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011572 }
Owen Taylor3473f882001-02-23 17:55:21 +000011573
11574 if ((ctxt->directory == NULL) && (directory == NULL))
11575 directory = xmlParserGetDirectory(filename);
11576 if ((ctxt->directory == NULL) && (directory != NULL))
11577 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11578
Daniel Veillarddad3f682002-11-17 16:47:27 +000011579 ctxt->recovery = recovery;
11580
Owen Taylor3473f882001-02-23 17:55:21 +000011581 xmlParseDocument(ctxt);
11582
William M. Brackc07329e2003-09-08 01:57:30 +000011583 if ((ctxt->wellFormed) || recovery) {
11584 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011585 if (ret != NULL) {
11586 if (ctxt->input->buf->compressed > 0)
11587 ret->compression = 9;
11588 else
11589 ret->compression = ctxt->input->buf->compressed;
11590 }
William M. Brackc07329e2003-09-08 01:57:30 +000011591 }
Owen Taylor3473f882001-02-23 17:55:21 +000011592 else {
11593 ret = NULL;
11594 xmlFreeDoc(ctxt->myDoc);
11595 ctxt->myDoc = NULL;
11596 }
11597 if (sax != NULL)
11598 ctxt->sax = NULL;
11599 xmlFreeParserCtxt(ctxt);
11600
11601 return(ret);
11602}
11603
11604/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011605 * xmlSAXParseFile:
11606 * @sax: the SAX handler block
11607 * @filename: the filename
11608 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11609 * documents
11610 *
11611 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11612 * compressed document is provided by default if found at compile-time.
11613 * It use the given SAX function block to handle the parsing callback.
11614 * If sax is NULL, fallback to the default DOM tree building routines.
11615 *
11616 * Returns the resulting document tree
11617 */
11618
11619xmlDocPtr
11620xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11621 int recovery) {
11622 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11623}
11624
11625/**
Owen Taylor3473f882001-02-23 17:55:21 +000011626 * xmlRecoverDoc:
11627 * @cur: a pointer to an array of xmlChar
11628 *
11629 * parse an XML in-memory document and build a tree.
11630 * In the case the document is not Well Formed, a tree is built anyway
11631 *
11632 * Returns the resulting document tree
11633 */
11634
11635xmlDocPtr
11636xmlRecoverDoc(xmlChar *cur) {
11637 return(xmlSAXParseDoc(NULL, cur, 1));
11638}
11639
11640/**
11641 * xmlParseFile:
11642 * @filename: the filename
11643 *
11644 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11645 * compressed document is provided by default if found at compile-time.
11646 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011647 * Returns the resulting document tree if the file was wellformed,
11648 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011649 */
11650
11651xmlDocPtr
11652xmlParseFile(const char *filename) {
11653 return(xmlSAXParseFile(NULL, filename, 0));
11654}
11655
11656/**
11657 * xmlRecoverFile:
11658 * @filename: the filename
11659 *
11660 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11661 * compressed document is provided by default if found at compile-time.
11662 * In the case the document is not Well Formed, a tree is built anyway
11663 *
11664 * Returns the resulting document tree
11665 */
11666
11667xmlDocPtr
11668xmlRecoverFile(const char *filename) {
11669 return(xmlSAXParseFile(NULL, filename, 1));
11670}
11671
11672
11673/**
11674 * xmlSetupParserForBuffer:
11675 * @ctxt: an XML parser context
11676 * @buffer: a xmlChar * buffer
11677 * @filename: a file name
11678 *
11679 * Setup the parser context to parse a new buffer; Clears any prior
11680 * contents from the parser context. The buffer parameter must not be
11681 * NULL, but the filename parameter can be
11682 */
11683void
11684xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11685 const char* filename)
11686{
11687 xmlParserInputPtr input;
11688
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011689 if ((ctxt == NULL) || (buffer == NULL))
11690 return;
11691
Owen Taylor3473f882001-02-23 17:55:21 +000011692 input = xmlNewInputStream(ctxt);
11693 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011694 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011695 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011696 return;
11697 }
11698
11699 xmlClearParserCtxt(ctxt);
11700 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011701 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011702 input->base = buffer;
11703 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011704 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011705 inputPush(ctxt, input);
11706}
11707
11708/**
11709 * xmlSAXUserParseFile:
11710 * @sax: a SAX handler
11711 * @user_data: The user data returned on SAX callbacks
11712 * @filename: a file name
11713 *
11714 * parse an XML file and call the given SAX handler routines.
11715 * Automatic support for ZLIB/Compress compressed document is provided
11716 *
11717 * Returns 0 in case of success or a error number otherwise
11718 */
11719int
11720xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11721 const char *filename) {
11722 int ret = 0;
11723 xmlParserCtxtPtr ctxt;
11724
11725 ctxt = xmlCreateFileParserCtxt(filename);
11726 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011727#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011728 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011729#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011730 xmlFree(ctxt->sax);
11731 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011732 xmlDetectSAX2(ctxt);
11733
Owen Taylor3473f882001-02-23 17:55:21 +000011734 if (user_data != NULL)
11735 ctxt->userData = user_data;
11736
11737 xmlParseDocument(ctxt);
11738
11739 if (ctxt->wellFormed)
11740 ret = 0;
11741 else {
11742 if (ctxt->errNo != 0)
11743 ret = ctxt->errNo;
11744 else
11745 ret = -1;
11746 }
11747 if (sax != NULL)
11748 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011749 if (ctxt->myDoc != NULL) {
11750 xmlFreeDoc(ctxt->myDoc);
11751 ctxt->myDoc = NULL;
11752 }
Owen Taylor3473f882001-02-23 17:55:21 +000011753 xmlFreeParserCtxt(ctxt);
11754
11755 return ret;
11756}
Daniel Veillard81273902003-09-30 00:43:48 +000011757#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011758
11759/************************************************************************
11760 * *
11761 * Front ends when parsing from memory *
11762 * *
11763 ************************************************************************/
11764
11765/**
11766 * xmlCreateMemoryParserCtxt:
11767 * @buffer: a pointer to a char array
11768 * @size: the size of the array
11769 *
11770 * Create a parser context for an XML in-memory document.
11771 *
11772 * Returns the new parser context or NULL
11773 */
11774xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011775xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011776 xmlParserCtxtPtr ctxt;
11777 xmlParserInputPtr input;
11778 xmlParserInputBufferPtr buf;
11779
11780 if (buffer == NULL)
11781 return(NULL);
11782 if (size <= 0)
11783 return(NULL);
11784
11785 ctxt = xmlNewParserCtxt();
11786 if (ctxt == NULL)
11787 return(NULL);
11788
Daniel Veillard53350552003-09-18 13:35:51 +000011789 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011790 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011791 if (buf == NULL) {
11792 xmlFreeParserCtxt(ctxt);
11793 return(NULL);
11794 }
Owen Taylor3473f882001-02-23 17:55:21 +000011795
11796 input = xmlNewInputStream(ctxt);
11797 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011798 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011799 xmlFreeParserCtxt(ctxt);
11800 return(NULL);
11801 }
11802
11803 input->filename = NULL;
11804 input->buf = buf;
11805 input->base = input->buf->buffer->content;
11806 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011807 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011808
11809 inputPush(ctxt, input);
11810 return(ctxt);
11811}
11812
Daniel Veillard81273902003-09-30 00:43:48 +000011813#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011814/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011815 * xmlSAXParseMemoryWithData:
11816 * @sax: the SAX handler block
11817 * @buffer: an pointer to a char array
11818 * @size: the size of the array
11819 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11820 * documents
11821 * @data: the userdata
11822 *
11823 * parse an XML in-memory block and use the given SAX function block
11824 * to handle the parsing callback. If sax is NULL, fallback to the default
11825 * DOM tree building routines.
11826 *
11827 * User data (void *) is stored within the parser context in the
11828 * context's _private member, so it is available nearly everywhere in libxml
11829 *
11830 * Returns the resulting document tree
11831 */
11832
11833xmlDocPtr
11834xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11835 int size, int recovery, void *data) {
11836 xmlDocPtr ret;
11837 xmlParserCtxtPtr ctxt;
11838
11839 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11840 if (ctxt == NULL) return(NULL);
11841 if (sax != NULL) {
11842 if (ctxt->sax != NULL)
11843 xmlFree(ctxt->sax);
11844 ctxt->sax = sax;
11845 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011846 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011847 if (data!=NULL) {
11848 ctxt->_private=data;
11849 }
11850
Daniel Veillardadba5f12003-04-04 16:09:01 +000011851 ctxt->recovery = recovery;
11852
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011853 xmlParseDocument(ctxt);
11854
11855 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11856 else {
11857 ret = NULL;
11858 xmlFreeDoc(ctxt->myDoc);
11859 ctxt->myDoc = NULL;
11860 }
11861 if (sax != NULL)
11862 ctxt->sax = NULL;
11863 xmlFreeParserCtxt(ctxt);
11864
11865 return(ret);
11866}
11867
11868/**
Owen Taylor3473f882001-02-23 17:55:21 +000011869 * xmlSAXParseMemory:
11870 * @sax: the SAX handler block
11871 * @buffer: an pointer to a char array
11872 * @size: the size of the array
11873 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11874 * documents
11875 *
11876 * parse an XML in-memory block and use the given SAX function block
11877 * to handle the parsing callback. If sax is NULL, fallback to the default
11878 * DOM tree building routines.
11879 *
11880 * Returns the resulting document tree
11881 */
11882xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011883xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11884 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011885 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011886}
11887
11888/**
11889 * xmlParseMemory:
11890 * @buffer: an pointer to a char array
11891 * @size: the size of the array
11892 *
11893 * parse an XML in-memory block and build a tree.
11894 *
11895 * Returns the resulting document tree
11896 */
11897
Daniel Veillard50822cb2001-07-26 20:05:51 +000011898xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011899 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11900}
11901
11902/**
11903 * xmlRecoverMemory:
11904 * @buffer: an pointer to a char array
11905 * @size: the size of the array
11906 *
11907 * parse an XML in-memory block and build a tree.
11908 * In the case the document is not Well Formed, a tree is built anyway
11909 *
11910 * Returns the resulting document tree
11911 */
11912
Daniel Veillard50822cb2001-07-26 20:05:51 +000011913xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011914 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11915}
11916
11917/**
11918 * xmlSAXUserParseMemory:
11919 * @sax: a SAX handler
11920 * @user_data: The user data returned on SAX callbacks
11921 * @buffer: an in-memory XML document input
11922 * @size: the length of the XML document in bytes
11923 *
11924 * A better SAX parsing routine.
11925 * parse an XML in-memory buffer and call the given SAX handler routines.
11926 *
11927 * Returns 0 in case of success or a error number otherwise
11928 */
11929int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011930 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011931 int ret = 0;
11932 xmlParserCtxtPtr ctxt;
11933 xmlSAXHandlerPtr oldsax = NULL;
11934
Daniel Veillard9e923512002-08-14 08:48:52 +000011935 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011936 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11937 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011938 oldsax = ctxt->sax;
11939 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011940 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011941 if (user_data != NULL)
11942 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011943
11944 xmlParseDocument(ctxt);
11945
11946 if (ctxt->wellFormed)
11947 ret = 0;
11948 else {
11949 if (ctxt->errNo != 0)
11950 ret = ctxt->errNo;
11951 else
11952 ret = -1;
11953 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011954 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000011955 if (ctxt->myDoc != NULL) {
11956 xmlFreeDoc(ctxt->myDoc);
11957 ctxt->myDoc = NULL;
11958 }
Owen Taylor3473f882001-02-23 17:55:21 +000011959 xmlFreeParserCtxt(ctxt);
11960
11961 return ret;
11962}
Daniel Veillard81273902003-09-30 00:43:48 +000011963#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011964
11965/**
11966 * xmlCreateDocParserCtxt:
11967 * @cur: a pointer to an array of xmlChar
11968 *
11969 * Creates a parser context for an XML in-memory document.
11970 *
11971 * Returns the new parser context or NULL
11972 */
11973xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011974xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011975 int len;
11976
11977 if (cur == NULL)
11978 return(NULL);
11979 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011980 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011981}
11982
Daniel Veillard81273902003-09-30 00:43:48 +000011983#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011984/**
11985 * xmlSAXParseDoc:
11986 * @sax: the SAX handler block
11987 * @cur: a pointer to an array of xmlChar
11988 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11989 * documents
11990 *
11991 * parse an XML in-memory document and build a tree.
11992 * It use the given SAX function block to handle the parsing callback.
11993 * If sax is NULL, fallback to the default DOM tree building routines.
11994 *
11995 * Returns the resulting document tree
11996 */
11997
11998xmlDocPtr
11999xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
12000 xmlDocPtr ret;
12001 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012002 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012003
Daniel Veillard38936062004-11-04 17:45:11 +000012004 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012005
12006
12007 ctxt = xmlCreateDocParserCtxt(cur);
12008 if (ctxt == NULL) return(NULL);
12009 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012010 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012011 ctxt->sax = sax;
12012 ctxt->userData = NULL;
12013 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012014 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012015
12016 xmlParseDocument(ctxt);
12017 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12018 else {
12019 ret = NULL;
12020 xmlFreeDoc(ctxt->myDoc);
12021 ctxt->myDoc = NULL;
12022 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012023 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012024 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012025 xmlFreeParserCtxt(ctxt);
12026
12027 return(ret);
12028}
12029
12030/**
12031 * xmlParseDoc:
12032 * @cur: a pointer to an array of xmlChar
12033 *
12034 * parse an XML in-memory document and build a tree.
12035 *
12036 * Returns the resulting document tree
12037 */
12038
12039xmlDocPtr
12040xmlParseDoc(xmlChar *cur) {
12041 return(xmlSAXParseDoc(NULL, cur, 0));
12042}
Daniel Veillard81273902003-09-30 00:43:48 +000012043#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012044
Daniel Veillard81273902003-09-30 00:43:48 +000012045#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012046/************************************************************************
12047 * *
12048 * Specific function to keep track of entities references *
12049 * and used by the XSLT debugger *
12050 * *
12051 ************************************************************************/
12052
12053static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12054
12055/**
12056 * xmlAddEntityReference:
12057 * @ent : A valid entity
12058 * @firstNode : A valid first node for children of entity
12059 * @lastNode : A valid last node of children entity
12060 *
12061 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12062 */
12063static void
12064xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12065 xmlNodePtr lastNode)
12066{
12067 if (xmlEntityRefFunc != NULL) {
12068 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12069 }
12070}
12071
12072
12073/**
12074 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012075 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012076 *
12077 * Set the function to call call back when a xml reference has been made
12078 */
12079void
12080xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12081{
12082 xmlEntityRefFunc = func;
12083}
Daniel Veillard81273902003-09-30 00:43:48 +000012084#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012085
12086/************************************************************************
12087 * *
12088 * Miscellaneous *
12089 * *
12090 ************************************************************************/
12091
12092#ifdef LIBXML_XPATH_ENABLED
12093#include <libxml/xpath.h>
12094#endif
12095
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012096extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012097static int xmlParserInitialized = 0;
12098
12099/**
12100 * xmlInitParser:
12101 *
12102 * Initialization function for the XML parser.
12103 * This is not reentrant. Call once before processing in case of
12104 * use in multithreaded programs.
12105 */
12106
12107void
12108xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012109 if (xmlParserInitialized != 0)
12110 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012111
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012112 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12113 (xmlGenericError == NULL))
12114 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012115 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012116 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012117 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012118 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012119 xmlDefaultSAXHandlerInit();
12120 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012121#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012122 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012123#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012124#ifdef LIBXML_HTML_ENABLED
12125 htmlInitAutoClose();
12126 htmlDefaultSAXHandlerInit();
12127#endif
12128#ifdef LIBXML_XPATH_ENABLED
12129 xmlXPathInit();
12130#endif
12131 xmlParserInitialized = 1;
12132}
12133
12134/**
12135 * xmlCleanupParser:
12136 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012137 * Cleanup function for the XML library. It tries to reclaim all
12138 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012139 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012140 * function should not prevent reusing the library but one should
12141 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012142 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012143 */
12144
12145void
12146xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012147 if (!xmlParserInitialized)
12148 return;
12149
Owen Taylor3473f882001-02-23 17:55:21 +000012150 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012151#ifdef LIBXML_CATALOG_ENABLED
12152 xmlCatalogCleanup();
12153#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012154 xmlCleanupInputCallbacks();
12155#ifdef LIBXML_OUTPUT_ENABLED
12156 xmlCleanupOutputCallbacks();
12157#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012158#ifdef LIBXML_SCHEMAS_ENABLED
12159 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012160 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012161#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012162 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012163 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012164 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012165 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012166 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012167}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012168
12169/************************************************************************
12170 * *
12171 * New set (2.6.0) of simpler and more flexible APIs *
12172 * *
12173 ************************************************************************/
12174
12175/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012176 * DICT_FREE:
12177 * @str: a string
12178 *
12179 * Free a string if it is not owned by the "dict" dictionnary in the
12180 * current scope
12181 */
12182#define DICT_FREE(str) \
12183 if ((str) && ((!dict) || \
12184 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12185 xmlFree((char *)(str));
12186
12187/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012188 * xmlCtxtReset:
12189 * @ctxt: an XML parser context
12190 *
12191 * Reset a parser context
12192 */
12193void
12194xmlCtxtReset(xmlParserCtxtPtr ctxt)
12195{
12196 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012197 xmlDictPtr dict;
12198
12199 if (ctxt == NULL)
12200 return;
12201
12202 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012203
12204 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12205 xmlFreeInputStream(input);
12206 }
12207 ctxt->inputNr = 0;
12208 ctxt->input = NULL;
12209
12210 ctxt->spaceNr = 0;
12211 ctxt->spaceTab[0] = -1;
12212 ctxt->space = &ctxt->spaceTab[0];
12213
12214
12215 ctxt->nodeNr = 0;
12216 ctxt->node = NULL;
12217
12218 ctxt->nameNr = 0;
12219 ctxt->name = NULL;
12220
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012221 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012222 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012223 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012224 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012225 DICT_FREE(ctxt->directory);
12226 ctxt->directory = NULL;
12227 DICT_FREE(ctxt->extSubURI);
12228 ctxt->extSubURI = NULL;
12229 DICT_FREE(ctxt->extSubSystem);
12230 ctxt->extSubSystem = NULL;
12231 if (ctxt->myDoc != NULL)
12232 xmlFreeDoc(ctxt->myDoc);
12233 ctxt->myDoc = NULL;
12234
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012235 ctxt->standalone = -1;
12236 ctxt->hasExternalSubset = 0;
12237 ctxt->hasPErefs = 0;
12238 ctxt->html = 0;
12239 ctxt->external = 0;
12240 ctxt->instate = XML_PARSER_START;
12241 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012242
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012243 ctxt->wellFormed = 1;
12244 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012245 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012246 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012247#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012248 ctxt->vctxt.userData = ctxt;
12249 ctxt->vctxt.error = xmlParserValidityError;
12250 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012251#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012252 ctxt->record_info = 0;
12253 ctxt->nbChars = 0;
12254 ctxt->checkIndex = 0;
12255 ctxt->inSubset = 0;
12256 ctxt->errNo = XML_ERR_OK;
12257 ctxt->depth = 0;
12258 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12259 ctxt->catalogs = NULL;
12260 xmlInitNodeInfoSeq(&ctxt->node_seq);
12261
12262 if (ctxt->attsDefault != NULL) {
12263 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12264 ctxt->attsDefault = NULL;
12265 }
12266 if (ctxt->attsSpecial != NULL) {
12267 xmlHashFree(ctxt->attsSpecial, NULL);
12268 ctxt->attsSpecial = NULL;
12269 }
12270
Daniel Veillard4432df22003-09-28 18:58:27 +000012271#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012272 if (ctxt->catalogs != NULL)
12273 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012274#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012275 if (ctxt->lastError.code != XML_ERR_OK)
12276 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012277}
12278
12279/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012280 * xmlCtxtResetPush:
12281 * @ctxt: an XML parser context
12282 * @chunk: a pointer to an array of chars
12283 * @size: number of chars in the array
12284 * @filename: an optional file name or URI
12285 * @encoding: the document encoding, or NULL
12286 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012287 * Reset a push parser context
12288 *
12289 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012290 */
12291int
12292xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12293 int size, const char *filename, const char *encoding)
12294{
12295 xmlParserInputPtr inputStream;
12296 xmlParserInputBufferPtr buf;
12297 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12298
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012299 if (ctxt == NULL)
12300 return(1);
12301
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012302 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12303 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12304
12305 buf = xmlAllocParserInputBuffer(enc);
12306 if (buf == NULL)
12307 return(1);
12308
12309 if (ctxt == NULL) {
12310 xmlFreeParserInputBuffer(buf);
12311 return(1);
12312 }
12313
12314 xmlCtxtReset(ctxt);
12315
12316 if (ctxt->pushTab == NULL) {
12317 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12318 sizeof(xmlChar *));
12319 if (ctxt->pushTab == NULL) {
12320 xmlErrMemory(ctxt, NULL);
12321 xmlFreeParserInputBuffer(buf);
12322 return(1);
12323 }
12324 }
12325
12326 if (filename == NULL) {
12327 ctxt->directory = NULL;
12328 } else {
12329 ctxt->directory = xmlParserGetDirectory(filename);
12330 }
12331
12332 inputStream = xmlNewInputStream(ctxt);
12333 if (inputStream == NULL) {
12334 xmlFreeParserInputBuffer(buf);
12335 return(1);
12336 }
12337
12338 if (filename == NULL)
12339 inputStream->filename = NULL;
12340 else
12341 inputStream->filename = (char *)
12342 xmlCanonicPath((const xmlChar *) filename);
12343 inputStream->buf = buf;
12344 inputStream->base = inputStream->buf->buffer->content;
12345 inputStream->cur = inputStream->buf->buffer->content;
12346 inputStream->end =
12347 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12348
12349 inputPush(ctxt, inputStream);
12350
12351 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12352 (ctxt->input->buf != NULL)) {
12353 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12354 int cur = ctxt->input->cur - ctxt->input->base;
12355
12356 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12357
12358 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12359 ctxt->input->cur = ctxt->input->base + cur;
12360 ctxt->input->end =
12361 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12362 use];
12363#ifdef DEBUG_PUSH
12364 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12365#endif
12366 }
12367
12368 if (encoding != NULL) {
12369 xmlCharEncodingHandlerPtr hdlr;
12370
12371 hdlr = xmlFindCharEncodingHandler(encoding);
12372 if (hdlr != NULL) {
12373 xmlSwitchToEncoding(ctxt, hdlr);
12374 } else {
12375 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12376 "Unsupported encoding %s\n", BAD_CAST encoding);
12377 }
12378 } else if (enc != XML_CHAR_ENCODING_NONE) {
12379 xmlSwitchEncoding(ctxt, enc);
12380 }
12381
12382 return(0);
12383}
12384
12385/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012386 * xmlCtxtUseOptions:
12387 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012388 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012389 *
12390 * Applies the options to the parser context
12391 *
12392 * Returns 0 in case of success, the set of unknown or unimplemented options
12393 * in case of error.
12394 */
12395int
12396xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12397{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012398 if (ctxt == NULL)
12399 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012400 if (options & XML_PARSE_RECOVER) {
12401 ctxt->recovery = 1;
12402 options -= XML_PARSE_RECOVER;
12403 } else
12404 ctxt->recovery = 0;
12405 if (options & XML_PARSE_DTDLOAD) {
12406 ctxt->loadsubset = XML_DETECT_IDS;
12407 options -= XML_PARSE_DTDLOAD;
12408 } else
12409 ctxt->loadsubset = 0;
12410 if (options & XML_PARSE_DTDATTR) {
12411 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12412 options -= XML_PARSE_DTDATTR;
12413 }
12414 if (options & XML_PARSE_NOENT) {
12415 ctxt->replaceEntities = 1;
12416 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12417 options -= XML_PARSE_NOENT;
12418 } else
12419 ctxt->replaceEntities = 0;
12420 if (options & XML_PARSE_NOWARNING) {
12421 ctxt->sax->warning = NULL;
12422 options -= XML_PARSE_NOWARNING;
12423 }
12424 if (options & XML_PARSE_NOERROR) {
12425 ctxt->sax->error = NULL;
12426 ctxt->sax->fatalError = NULL;
12427 options -= XML_PARSE_NOERROR;
12428 }
12429 if (options & XML_PARSE_PEDANTIC) {
12430 ctxt->pedantic = 1;
12431 options -= XML_PARSE_PEDANTIC;
12432 } else
12433 ctxt->pedantic = 0;
12434 if (options & XML_PARSE_NOBLANKS) {
12435 ctxt->keepBlanks = 0;
12436 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12437 options -= XML_PARSE_NOBLANKS;
12438 } else
12439 ctxt->keepBlanks = 1;
12440 if (options & XML_PARSE_DTDVALID) {
12441 ctxt->validate = 1;
12442 if (options & XML_PARSE_NOWARNING)
12443 ctxt->vctxt.warning = NULL;
12444 if (options & XML_PARSE_NOERROR)
12445 ctxt->vctxt.error = NULL;
12446 options -= XML_PARSE_DTDVALID;
12447 } else
12448 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012449#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012450 if (options & XML_PARSE_SAX1) {
12451 ctxt->sax->startElement = xmlSAX2StartElement;
12452 ctxt->sax->endElement = xmlSAX2EndElement;
12453 ctxt->sax->startElementNs = NULL;
12454 ctxt->sax->endElementNs = NULL;
12455 ctxt->sax->initialized = 1;
12456 options -= XML_PARSE_SAX1;
12457 }
Daniel Veillard81273902003-09-30 00:43:48 +000012458#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012459 if (options & XML_PARSE_NODICT) {
12460 ctxt->dictNames = 0;
12461 options -= XML_PARSE_NODICT;
12462 } else {
12463 ctxt->dictNames = 1;
12464 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012465 if (options & XML_PARSE_NOCDATA) {
12466 ctxt->sax->cdataBlock = NULL;
12467 options -= XML_PARSE_NOCDATA;
12468 }
12469 if (options & XML_PARSE_NSCLEAN) {
12470 ctxt->options |= XML_PARSE_NSCLEAN;
12471 options -= XML_PARSE_NSCLEAN;
12472 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012473 if (options & XML_PARSE_NONET) {
12474 ctxt->options |= XML_PARSE_NONET;
12475 options -= XML_PARSE_NONET;
12476 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012477 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012478 return (options);
12479}
12480
12481/**
12482 * xmlDoRead:
12483 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012484 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012485 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012486 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012487 * @reuse: keep the context for reuse
12488 *
12489 * Common front-end for the xmlRead functions
12490 *
12491 * Returns the resulting document tree or NULL
12492 */
12493static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012494xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12495 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012496{
12497 xmlDocPtr ret;
12498
12499 xmlCtxtUseOptions(ctxt, options);
12500 if (encoding != NULL) {
12501 xmlCharEncodingHandlerPtr hdlr;
12502
12503 hdlr = xmlFindCharEncodingHandler(encoding);
12504 if (hdlr != NULL)
12505 xmlSwitchToEncoding(ctxt, hdlr);
12506 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012507 if ((URL != NULL) && (ctxt->input != NULL) &&
12508 (ctxt->input->filename == NULL))
12509 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012510 xmlParseDocument(ctxt);
12511 if ((ctxt->wellFormed) || ctxt->recovery)
12512 ret = ctxt->myDoc;
12513 else {
12514 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012515 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012516 xmlFreeDoc(ctxt->myDoc);
12517 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012518 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012519 ctxt->myDoc = NULL;
12520 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012521 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012522 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012523
12524 return (ret);
12525}
12526
12527/**
12528 * xmlReadDoc:
12529 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012530 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012531 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012532 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012533 *
12534 * parse an XML in-memory document and build a tree.
12535 *
12536 * Returns the resulting document tree
12537 */
12538xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012539xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012540{
12541 xmlParserCtxtPtr ctxt;
12542
12543 if (cur == NULL)
12544 return (NULL);
12545
12546 ctxt = xmlCreateDocParserCtxt(cur);
12547 if (ctxt == NULL)
12548 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012549 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012550}
12551
12552/**
12553 * xmlReadFile:
12554 * @filename: a file or URL
12555 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012556 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557 *
12558 * parse an XML file from the filesystem or the network.
12559 *
12560 * Returns the resulting document tree
12561 */
12562xmlDocPtr
12563xmlReadFile(const char *filename, const char *encoding, int options)
12564{
12565 xmlParserCtxtPtr ctxt;
12566
Daniel Veillard61b93382003-11-03 14:28:31 +000012567 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012568 if (ctxt == NULL)
12569 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012570 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012571}
12572
12573/**
12574 * xmlReadMemory:
12575 * @buffer: a pointer to a char array
12576 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012577 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012578 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012579 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012580 *
12581 * parse an XML in-memory document and build a tree.
12582 *
12583 * Returns the resulting document tree
12584 */
12585xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012586xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012587{
12588 xmlParserCtxtPtr ctxt;
12589
12590 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12591 if (ctxt == NULL)
12592 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012593 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012594}
12595
12596/**
12597 * xmlReadFd:
12598 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012599 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012600 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012601 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012602 *
12603 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012604 * NOTE that the file descriptor will not be closed when the
12605 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012606 *
12607 * Returns the resulting document tree
12608 */
12609xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012610xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012611{
12612 xmlParserCtxtPtr ctxt;
12613 xmlParserInputBufferPtr input;
12614 xmlParserInputPtr stream;
12615
12616 if (fd < 0)
12617 return (NULL);
12618
12619 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12620 if (input == NULL)
12621 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012622 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012623 ctxt = xmlNewParserCtxt();
12624 if (ctxt == NULL) {
12625 xmlFreeParserInputBuffer(input);
12626 return (NULL);
12627 }
12628 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12629 if (stream == NULL) {
12630 xmlFreeParserInputBuffer(input);
12631 xmlFreeParserCtxt(ctxt);
12632 return (NULL);
12633 }
12634 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012635 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012636}
12637
12638/**
12639 * xmlReadIO:
12640 * @ioread: an I/O read function
12641 * @ioclose: an I/O close function
12642 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012643 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012644 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012645 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012646 *
12647 * parse an XML document from I/O functions and source and build a tree.
12648 *
12649 * Returns the resulting document tree
12650 */
12651xmlDocPtr
12652xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012653 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012654{
12655 xmlParserCtxtPtr ctxt;
12656 xmlParserInputBufferPtr input;
12657 xmlParserInputPtr stream;
12658
12659 if (ioread == NULL)
12660 return (NULL);
12661
12662 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12663 XML_CHAR_ENCODING_NONE);
12664 if (input == NULL)
12665 return (NULL);
12666 ctxt = xmlNewParserCtxt();
12667 if (ctxt == NULL) {
12668 xmlFreeParserInputBuffer(input);
12669 return (NULL);
12670 }
12671 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12672 if (stream == NULL) {
12673 xmlFreeParserInputBuffer(input);
12674 xmlFreeParserCtxt(ctxt);
12675 return (NULL);
12676 }
12677 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012678 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012679}
12680
12681/**
12682 * xmlCtxtReadDoc:
12683 * @ctxt: an XML parser context
12684 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012685 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012686 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012687 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012688 *
12689 * parse an XML in-memory document and build a tree.
12690 * This reuses the existing @ctxt parser context
12691 *
12692 * Returns the resulting document tree
12693 */
12694xmlDocPtr
12695xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012696 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012697{
12698 xmlParserInputPtr stream;
12699
12700 if (cur == NULL)
12701 return (NULL);
12702 if (ctxt == NULL)
12703 return (NULL);
12704
12705 xmlCtxtReset(ctxt);
12706
12707 stream = xmlNewStringInputStream(ctxt, cur);
12708 if (stream == NULL) {
12709 return (NULL);
12710 }
12711 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012712 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012713}
12714
12715/**
12716 * xmlCtxtReadFile:
12717 * @ctxt: an XML parser context
12718 * @filename: a file or URL
12719 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012720 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012721 *
12722 * parse an XML file from the filesystem or the network.
12723 * This reuses the existing @ctxt parser context
12724 *
12725 * Returns the resulting document tree
12726 */
12727xmlDocPtr
12728xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12729 const char *encoding, int options)
12730{
12731 xmlParserInputPtr stream;
12732
12733 if (filename == NULL)
12734 return (NULL);
12735 if (ctxt == NULL)
12736 return (NULL);
12737
12738 xmlCtxtReset(ctxt);
12739
Daniel Veillard29614c72004-11-26 10:47:26 +000012740 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012741 if (stream == NULL) {
12742 return (NULL);
12743 }
12744 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012745 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012746}
12747
12748/**
12749 * xmlCtxtReadMemory:
12750 * @ctxt: an XML parser context
12751 * @buffer: a pointer to a char array
12752 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012753 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012754 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012755 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012756 *
12757 * parse an XML in-memory document and build a tree.
12758 * This reuses the existing @ctxt parser context
12759 *
12760 * Returns the resulting document tree
12761 */
12762xmlDocPtr
12763xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012764 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012765{
12766 xmlParserInputBufferPtr input;
12767 xmlParserInputPtr stream;
12768
12769 if (ctxt == NULL)
12770 return (NULL);
12771 if (buffer == NULL)
12772 return (NULL);
12773
12774 xmlCtxtReset(ctxt);
12775
12776 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12777 if (input == NULL) {
12778 return(NULL);
12779 }
12780
12781 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12782 if (stream == NULL) {
12783 xmlFreeParserInputBuffer(input);
12784 return(NULL);
12785 }
12786
12787 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012788 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012789}
12790
12791/**
12792 * xmlCtxtReadFd:
12793 * @ctxt: an XML parser context
12794 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012795 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012796 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012797 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012798 *
12799 * parse an XML from a file descriptor and build a tree.
12800 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012801 * NOTE that the file descriptor will not be closed when the
12802 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012803 *
12804 * Returns the resulting document tree
12805 */
12806xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012807xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12808 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012809{
12810 xmlParserInputBufferPtr input;
12811 xmlParserInputPtr stream;
12812
12813 if (fd < 0)
12814 return (NULL);
12815 if (ctxt == NULL)
12816 return (NULL);
12817
12818 xmlCtxtReset(ctxt);
12819
12820
12821 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12822 if (input == NULL)
12823 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012824 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012825 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12826 if (stream == NULL) {
12827 xmlFreeParserInputBuffer(input);
12828 return (NULL);
12829 }
12830 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012831 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012832}
12833
12834/**
12835 * xmlCtxtReadIO:
12836 * @ctxt: an XML parser context
12837 * @ioread: an I/O read function
12838 * @ioclose: an I/O close function
12839 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012840 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012841 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012842 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012843 *
12844 * parse an XML document from I/O functions and source and build a tree.
12845 * This reuses the existing @ctxt parser context
12846 *
12847 * Returns the resulting document tree
12848 */
12849xmlDocPtr
12850xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12851 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012852 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012853 const char *encoding, int options)
12854{
12855 xmlParserInputBufferPtr input;
12856 xmlParserInputPtr stream;
12857
12858 if (ioread == NULL)
12859 return (NULL);
12860 if (ctxt == NULL)
12861 return (NULL);
12862
12863 xmlCtxtReset(ctxt);
12864
12865 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12866 XML_CHAR_ENCODING_NONE);
12867 if (input == NULL)
12868 return (NULL);
12869 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12870 if (stream == NULL) {
12871 xmlFreeParserInputBuffer(input);
12872 return (NULL);
12873 }
12874 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012875 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012876}