blob: 8c8e6b9255b3bef3abda891e07d3d2e764a1bf59 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000413 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000414 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000415 schannel = ctxt->sax->serror;
416 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000417 (ctxt->sax) ? ctxt->sax->warning : NULL,
418 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000419 ctxt, NULL, XML_FROM_PARSER, error,
420 XML_ERR_WARNING, NULL, 0,
421 (const char *) str1, (const char *) str2, NULL, 0, 0,
422 msg, (const char *) str1, (const char *) str2);
423}
424
425/**
426 * xmlValidityError:
427 * @ctxt: an XML parser context
428 * @error: the error number
429 * @msg: the error message
430 * @str1: extra data
431 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000432 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000433 */
434static void
435xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
436 const char *msg, const xmlChar *str1)
437{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000438 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000439
440 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
441 (ctxt->instate == XML_PARSER_EOF))
442 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000443 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000444 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000445 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000446 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000447 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000448 ctxt, NULL, XML_FROM_DTD, error,
449 XML_ERR_ERROR, NULL, 0, (const char *) str1,
450 NULL, NULL, 0, 0,
451 msg, (const char *) str1);
452 ctxt->valid = 0;
453}
454
455/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000456 * xmlFatalErrMsgInt:
457 * @ctxt: an XML parser context
458 * @error: the error number
459 * @msg: the error message
460 * @val: an integer value
461 *
462 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
463 */
464static void
465xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000467{
Daniel Veillard157fee02003-10-31 10:36:03 +0000468 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
469 (ctxt->instate == XML_PARSER_EOF))
470 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000471 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000472 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
474 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000475 ctxt->wellFormed = 0;
476 if (ctxt->recovery == 0)
477 ctxt->disableSAX = 1;
478}
479
480/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000481 * xmlFatalErrMsgStrIntStr:
482 * @ctxt: an XML parser context
483 * @error: the error number
484 * @msg: the error message
485 * @str1: an string info
486 * @val: an integer value
487 * @str2: an string info
488 *
489 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
490 */
491static void
492xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493 const char *msg, const xmlChar *str1, int val,
494 const xmlChar *str2)
495{
Daniel Veillard157fee02003-10-31 10:36:03 +0000496 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497 (ctxt->instate == XML_PARSER_EOF))
498 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000499 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000500 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000501 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
502 NULL, 0, (const char *) str1, (const char *) str2,
503 NULL, val, 0, msg, str1, val, str2);
504 ctxt->wellFormed = 0;
505 if (ctxt->recovery == 0)
506 ctxt->disableSAX = 1;
507}
508
509/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000510 * xmlFatalErrMsgStr:
511 * @ctxt: an XML parser context
512 * @error: the error number
513 * @msg: the error message
514 * @val: a string value
515 *
516 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
517 */
518static void
519xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000520 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000521{
Daniel Veillard157fee02003-10-31 10:36:03 +0000522 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
523 (ctxt->instate == XML_PARSER_EOF))
524 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000525 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000526 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 XML_FROM_PARSER, error, XML_ERR_FATAL,
528 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
529 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000530 ctxt->wellFormed = 0;
531 if (ctxt->recovery == 0)
532 ctxt->disableSAX = 1;
533}
534
535/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000536 * xmlErrMsgStr:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @val: a string value
541 *
542 * Handle a non fatal parser error
543 */
544static void
545xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
546 const char *msg, const xmlChar * val)
547{
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000551 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000553 XML_FROM_PARSER, error, XML_ERR_ERROR,
554 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
555 val);
556}
557
558/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000559 * xmlNsErr:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the message
563 * @info1: extra information string
564 * @info2: extra information string
565 *
566 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
567 */
568static void
569xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
570 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000571 const xmlChar * info1, const xmlChar * info2,
572 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000573{
Daniel Veillard157fee02003-10-31 10:36:03 +0000574 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
575 (ctxt->instate == XML_PARSER_EOF))
576 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000578 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000579 XML_ERR_ERROR, NULL, 0, (const char *) info1,
580 (const char *) info2, (const char *) info3, 0, 0, msg,
581 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582 ctxt->nsWellFormed = 0;
583}
584
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000585/************************************************************************
586 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000587 * SAX2 defaulted attributes handling *
588 * *
589 ************************************************************************/
590
591/**
592 * xmlDetectSAX2:
593 * @ctxt: an XML parser context
594 *
595 * Do the SAX2 detection and specific intialization
596 */
597static void
598xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
599 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000601 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
602 ((ctxt->sax->startElementNs != NULL) ||
603 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000604#else
605 ctxt->sax2 = 1;
606#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000607
608 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
609 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
610 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000611 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
612 (ctxt->str_xml_ns == NULL)) {
613 xmlErrMemory(ctxt, NULL);
614 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000615}
616
Daniel Veillarde57ec792003-09-10 10:50:59 +0000617typedef struct _xmlDefAttrs xmlDefAttrs;
618typedef xmlDefAttrs *xmlDefAttrsPtr;
619struct _xmlDefAttrs {
620 int nbAttrs; /* number of defaulted attributes on that element */
621 int maxAttrs; /* the size of the array */
622 const xmlChar *values[4]; /* array of localname/prefix/values */
623};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000624
625/**
626 * xmlAddDefAttrs:
627 * @ctxt: an XML parser context
628 * @fullname: the element fullname
629 * @fullattr: the attribute fullname
630 * @value: the attribute value
631 *
632 * Add a defaulted attribute for an element
633 */
634static void
635xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
636 const xmlChar *fullname,
637 const xmlChar *fullattr,
638 const xmlChar *value) {
639 xmlDefAttrsPtr defaults;
640 int len;
641 const xmlChar *name;
642 const xmlChar *prefix;
643
644 if (ctxt->attsDefault == NULL) {
645 ctxt->attsDefault = xmlHashCreate(10);
646 if (ctxt->attsDefault == NULL)
647 goto mem_error;
648 }
649
650 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000651 * split the element name into prefix:localname , the string found
652 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000653 */
654 name = xmlSplitQName3(fullname, &len);
655 if (name == NULL) {
656 name = xmlDictLookup(ctxt->dict, fullname, -1);
657 prefix = NULL;
658 } else {
659 name = xmlDictLookup(ctxt->dict, name, -1);
660 prefix = xmlDictLookup(ctxt->dict, fullname, len);
661 }
662
663 /*
664 * make sure there is some storage
665 */
666 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
667 if (defaults == NULL) {
668 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000669 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000670 if (defaults == NULL)
671 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000672 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000673 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000674 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
675 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000676 xmlDefAttrsPtr temp;
677
678 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000679 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000680 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000681 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000682 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000683 defaults->maxAttrs *= 2;
684 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
685 }
686
687 /*
688 * plit the element name into prefix:localname , the string found
689 * are within the DTD and hen not associated to namespace names.
690 */
691 name = xmlSplitQName3(fullattr, &len);
692 if (name == NULL) {
693 name = xmlDictLookup(ctxt->dict, fullattr, -1);
694 prefix = NULL;
695 } else {
696 name = xmlDictLookup(ctxt->dict, name, -1);
697 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
698 }
699
700 defaults->values[4 * defaults->nbAttrs] = name;
701 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
702 /* intern the string and precompute the end */
703 len = xmlStrlen(value);
704 value = xmlDictLookup(ctxt->dict, value, len);
705 defaults->values[4 * defaults->nbAttrs + 2] = value;
706 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
707 defaults->nbAttrs++;
708
709 return;
710
711mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000712 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000713 return;
714}
715
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000716/**
717 * xmlAddSpecialAttr:
718 * @ctxt: an XML parser context
719 * @fullname: the element fullname
720 * @fullattr: the attribute fullname
721 * @type: the attribute type
722 *
723 * Register that this attribute is not CDATA
724 */
725static void
726xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
727 const xmlChar *fullname,
728 const xmlChar *fullattr,
729 int type)
730{
731 if (ctxt->attsSpecial == NULL) {
732 ctxt->attsSpecial = xmlHashCreate(10);
733 if (ctxt->attsSpecial == NULL)
734 goto mem_error;
735 }
736
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000737 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
738 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000739 return;
740
741mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000742 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000743 return;
744}
745
Daniel Veillard4432df22003-09-28 18:58:27 +0000746/**
747 * xmlCheckLanguageID:
748 * @lang: pointer to the string value
749 *
750 * Checks that the value conforms to the LanguageID production:
751 *
752 * NOTE: this is somewhat deprecated, those productions were removed from
753 * the XML Second edition.
754 *
755 * [33] LanguageID ::= Langcode ('-' Subcode)*
756 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
757 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
758 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
759 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
760 * [38] Subcode ::= ([a-z] | [A-Z])+
761 *
762 * Returns 1 if correct 0 otherwise
763 **/
764int
765xmlCheckLanguageID(const xmlChar * lang)
766{
767 const xmlChar *cur = lang;
768
769 if (cur == NULL)
770 return (0);
771 if (((cur[0] == 'i') && (cur[1] == '-')) ||
772 ((cur[0] == 'I') && (cur[1] == '-'))) {
773 /*
774 * IANA code
775 */
776 cur += 2;
777 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
778 ((cur[0] >= 'a') && (cur[0] <= 'z')))
779 cur++;
780 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
781 ((cur[0] == 'X') && (cur[1] == '-'))) {
782 /*
783 * User code
784 */
785 cur += 2;
786 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
787 ((cur[0] >= 'a') && (cur[0] <= 'z')))
788 cur++;
789 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
790 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
791 /*
792 * ISO639
793 */
794 cur++;
795 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
796 ((cur[0] >= 'a') && (cur[0] <= 'z')))
797 cur++;
798 else
799 return (0);
800 } else
801 return (0);
802 while (cur[0] != 0) { /* non input consuming */
803 if (cur[0] != '-')
804 return (0);
805 cur++;
806 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
807 ((cur[0] >= 'a') && (cur[0] <= 'z')))
808 cur++;
809 else
810 return (0);
811 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
812 ((cur[0] >= 'a') && (cur[0] <= 'z')))
813 cur++;
814 }
815 return (1);
816}
817
Owen Taylor3473f882001-02-23 17:55:21 +0000818/************************************************************************
819 * *
820 * Parser stacks related functions and macros *
821 * *
822 ************************************************************************/
823
824xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
825 const xmlChar ** str);
826
Daniel Veillard0fb18932003-09-07 09:14:37 +0000827#ifdef SAX2
828/**
829 * nsPush:
830 * @ctxt: an XML parser context
831 * @prefix: the namespace prefix or NULL
832 * @URL: the namespace name
833 *
834 * Pushes a new parser namespace on top of the ns stack
835 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000836 * Returns -1 in case of error, -2 if the namespace should be discarded
837 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000838 */
839static int
840nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
841{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000842 if (ctxt->options & XML_PARSE_NSCLEAN) {
843 int i;
844 for (i = 0;i < ctxt->nsNr;i += 2) {
845 if (ctxt->nsTab[i] == prefix) {
846 /* in scope */
847 if (ctxt->nsTab[i + 1] == URL)
848 return(-2);
849 /* out of scope keep it */
850 break;
851 }
852 }
853 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000854 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
855 ctxt->nsMax = 10;
856 ctxt->nsNr = 0;
857 ctxt->nsTab = (const xmlChar **)
858 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
859 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000860 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000861 ctxt->nsMax = 0;
862 return (-1);
863 }
864 } else if (ctxt->nsNr >= ctxt->nsMax) {
865 ctxt->nsMax *= 2;
866 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000867 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000868 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
869 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000870 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000871 ctxt->nsMax /= 2;
872 return (-1);
873 }
874 }
875 ctxt->nsTab[ctxt->nsNr++] = prefix;
876 ctxt->nsTab[ctxt->nsNr++] = URL;
877 return (ctxt->nsNr);
878}
879/**
880 * nsPop:
881 * @ctxt: an XML parser context
882 * @nr: the number to pop
883 *
884 * Pops the top @nr parser prefix/namespace from the ns stack
885 *
886 * Returns the number of namespaces removed
887 */
888static int
889nsPop(xmlParserCtxtPtr ctxt, int nr)
890{
891 int i;
892
893 if (ctxt->nsTab == NULL) return(0);
894 if (ctxt->nsNr < nr) {
895 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
896 nr = ctxt->nsNr;
897 }
898 if (ctxt->nsNr <= 0)
899 return (0);
900
901 for (i = 0;i < nr;i++) {
902 ctxt->nsNr--;
903 ctxt->nsTab[ctxt->nsNr] = NULL;
904 }
905 return(nr);
906}
907#endif
908
909static int
910xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
911 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000912 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000913 int maxatts;
914
915 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000916 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000917 atts = (const xmlChar **)
918 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000919 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000920 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000921 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
922 if (attallocs == NULL) goto mem_error;
923 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000924 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000925 } else if (nr + 5 > ctxt->maxatts) {
926 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000927 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
928 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000929 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000930 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000931 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
932 (maxatts / 5) * sizeof(int));
933 if (attallocs == NULL) goto mem_error;
934 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000935 ctxt->maxatts = maxatts;
936 }
937 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000938mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000939 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000940 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000941}
942
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000943/**
944 * inputPush:
945 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000946 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000947 *
948 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000949 *
950 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000951 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000952int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000953inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
954{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000955 if ((ctxt == NULL) || (value == NULL))
956 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000957 if (ctxt->inputNr >= ctxt->inputMax) {
958 ctxt->inputMax *= 2;
959 ctxt->inputTab =
960 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
961 ctxt->inputMax *
962 sizeof(ctxt->inputTab[0]));
963 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000964 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000965 return (0);
966 }
967 }
968 ctxt->inputTab[ctxt->inputNr] = value;
969 ctxt->input = value;
970 return (ctxt->inputNr++);
971}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000972/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000973 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000974 * @ctxt: an XML parser context
975 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000976 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000977 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000978 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000979 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000980xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000981inputPop(xmlParserCtxtPtr ctxt)
982{
983 xmlParserInputPtr ret;
984
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000985 if (ctxt == NULL)
986 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000987 if (ctxt->inputNr <= 0)
988 return (0);
989 ctxt->inputNr--;
990 if (ctxt->inputNr > 0)
991 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
992 else
993 ctxt->input = NULL;
994 ret = ctxt->inputTab[ctxt->inputNr];
995 ctxt->inputTab[ctxt->inputNr] = 0;
996 return (ret);
997}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000998/**
999 * nodePush:
1000 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001001 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001002 *
1003 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001004 *
1005 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001006 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001007int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001008nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1009{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001010 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001011 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001012 xmlNodePtr *tmp;
1013
1014 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1015 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001016 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001017 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001018 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001019 return (0);
1020 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001021 ctxt->nodeTab = tmp;
1022 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001023 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001024 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001025 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001026 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1027 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001029 return(0);
1030 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001031 ctxt->nodeTab[ctxt->nodeNr] = value;
1032 ctxt->node = value;
1033 return (ctxt->nodeNr++);
1034}
1035/**
1036 * nodePop:
1037 * @ctxt: an XML parser context
1038 *
1039 * Pops the top element node from the node stack
1040 *
1041 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001042 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001043xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001044nodePop(xmlParserCtxtPtr ctxt)
1045{
1046 xmlNodePtr ret;
1047
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001048 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001049 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001050 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001051 ctxt->nodeNr--;
1052 if (ctxt->nodeNr > 0)
1053 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1054 else
1055 ctxt->node = NULL;
1056 ret = ctxt->nodeTab[ctxt->nodeNr];
1057 ctxt->nodeTab[ctxt->nodeNr] = 0;
1058 return (ret);
1059}
Daniel Veillarda2351322004-06-27 12:08:10 +00001060
1061#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001062/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001063 * nameNsPush:
1064 * @ctxt: an XML parser context
1065 * @value: the element name
1066 * @prefix: the element prefix
1067 * @URI: the element namespace name
1068 *
1069 * Pushes a new element name/prefix/URL on top of the name stack
1070 *
1071 * Returns -1 in case of error, the index in the stack otherwise
1072 */
1073static int
1074nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1075 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1076{
1077 if (ctxt->nameNr >= ctxt->nameMax) {
1078 const xmlChar * *tmp;
1079 void **tmp2;
1080 ctxt->nameMax *= 2;
1081 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1082 ctxt->nameMax *
1083 sizeof(ctxt->nameTab[0]));
1084 if (tmp == NULL) {
1085 ctxt->nameMax /= 2;
1086 goto mem_error;
1087 }
1088 ctxt->nameTab = tmp;
1089 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1090 ctxt->nameMax * 3 *
1091 sizeof(ctxt->pushTab[0]));
1092 if (tmp2 == NULL) {
1093 ctxt->nameMax /= 2;
1094 goto mem_error;
1095 }
1096 ctxt->pushTab = tmp2;
1097 }
1098 ctxt->nameTab[ctxt->nameNr] = value;
1099 ctxt->name = value;
1100 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1101 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001102 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 return (ctxt->nameNr++);
1104mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001105 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001106 return (-1);
1107}
1108/**
1109 * nameNsPop:
1110 * @ctxt: an XML parser context
1111 *
1112 * Pops the top element/prefix/URI name from the name stack
1113 *
1114 * Returns the name just removed
1115 */
1116static const xmlChar *
1117nameNsPop(xmlParserCtxtPtr ctxt)
1118{
1119 const xmlChar *ret;
1120
1121 if (ctxt->nameNr <= 0)
1122 return (0);
1123 ctxt->nameNr--;
1124 if (ctxt->nameNr > 0)
1125 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1126 else
1127 ctxt->name = NULL;
1128 ret = ctxt->nameTab[ctxt->nameNr];
1129 ctxt->nameTab[ctxt->nameNr] = NULL;
1130 return (ret);
1131}
Daniel Veillarda2351322004-06-27 12:08:10 +00001132#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001133
1134/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001135 * namePush:
1136 * @ctxt: an XML parser context
1137 * @value: the element name
1138 *
1139 * Pushes a new element name on top of the name stack
1140 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001142 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001143int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001144namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001145{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001146 if (ctxt == NULL) return (-1);
1147
Daniel Veillard1c732d22002-11-30 11:22:59 +00001148 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001149 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001150 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001151 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001152 ctxt->nameMax *
1153 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 if (tmp == NULL) {
1155 ctxt->nameMax /= 2;
1156 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001157 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001159 }
1160 ctxt->nameTab[ctxt->nameNr] = value;
1161 ctxt->name = value;
1162 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001163mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001164 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001165 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001166}
1167/**
1168 * namePop:
1169 * @ctxt: an XML parser context
1170 *
1171 * Pops the top element name from the name stack
1172 *
1173 * Returns the name just removed
1174 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001175const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001176namePop(xmlParserCtxtPtr ctxt)
1177{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001178 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001179
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001180 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1181 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001182 ctxt->nameNr--;
1183 if (ctxt->nameNr > 0)
1184 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1185 else
1186 ctxt->name = NULL;
1187 ret = ctxt->nameTab[ctxt->nameNr];
1188 ctxt->nameTab[ctxt->nameNr] = 0;
1189 return (ret);
1190}
Owen Taylor3473f882001-02-23 17:55:21 +00001191
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001192static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001193 if (ctxt->spaceNr >= ctxt->spaceMax) {
1194 ctxt->spaceMax *= 2;
1195 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1196 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1197 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001198 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001199 return(0);
1200 }
1201 }
1202 ctxt->spaceTab[ctxt->spaceNr] = val;
1203 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1204 return(ctxt->spaceNr++);
1205}
1206
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001207static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001208 int ret;
1209 if (ctxt->spaceNr <= 0) return(0);
1210 ctxt->spaceNr--;
1211 if (ctxt->spaceNr > 0)
1212 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1213 else
1214 ctxt->space = NULL;
1215 ret = ctxt->spaceTab[ctxt->spaceNr];
1216 ctxt->spaceTab[ctxt->spaceNr] = -1;
1217 return(ret);
1218}
1219
1220/*
1221 * Macros for accessing the content. Those should be used only by the parser,
1222 * and not exported.
1223 *
1224 * Dirty macros, i.e. one often need to make assumption on the context to
1225 * use them
1226 *
1227 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1228 * To be used with extreme caution since operations consuming
1229 * characters may move the input buffer to a different location !
1230 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1231 * This should be used internally by the parser
1232 * only to compare to ASCII values otherwise it would break when
1233 * running with UTF-8 encoding.
1234 * RAW same as CUR but in the input buffer, bypass any token
1235 * extraction that may have been done
1236 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1237 * to compare on ASCII based substring.
1238 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001239 * strings without newlines within the parser.
1240 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1241 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001242 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1243 *
1244 * NEXT Skip to the next character, this does the proper decoding
1245 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001246 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001247 * CUR_CHAR(l) returns the current unicode character (int), set l
1248 * to the number of xmlChars used for the encoding [0-5].
1249 * CUR_SCHAR same but operate on a string instead of the context
1250 * COPY_BUF copy the current unicode char to the target buffer, increment
1251 * the index
1252 * GROW, SHRINK handling of input buffers
1253 */
1254
Daniel Veillardfdc91562002-07-01 21:52:03 +00001255#define RAW (*ctxt->input->cur)
1256#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001257#define NXT(val) ctxt->input->cur[(val)]
1258#define CUR_PTR ctxt->input->cur
1259
Daniel Veillarda07050d2003-10-19 14:46:32 +00001260#define CMP4( s, c1, c2, c3, c4 ) \
1261 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1262 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1263#define CMP5( s, c1, c2, c3, c4, c5 ) \
1264 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1265#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1266 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1267#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1268 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1269#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1270 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1271#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1272 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1273 ((unsigned char *) s)[ 8 ] == c9 )
1274#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1275 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1276 ((unsigned char *) s)[ 9 ] == c10 )
1277
Owen Taylor3473f882001-02-23 17:55:21 +00001278#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001279 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001280 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001281 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1283 xmlPopInput(ctxt); \
1284 } while (0)
1285
Daniel Veillard0b787f32004-03-26 17:29:53 +00001286#define SKIPL(val) do { \
1287 int skipl; \
1288 for(skipl=0; skipl<val; skipl++) { \
1289 if (*(ctxt->input->cur) == '\n') { \
1290 ctxt->input->line++; ctxt->input->col = 1; \
1291 } else ctxt->input->col++; \
1292 ctxt->nbChars++; \
1293 ctxt->input->cur++; \
1294 } \
1295 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1296 if ((*ctxt->input->cur == 0) && \
1297 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1298 xmlPopInput(ctxt); \
1299 } while (0)
1300
Daniel Veillarda880b122003-04-21 21:36:41 +00001301#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001302 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1303 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001304 xmlSHRINK (ctxt);
1305
1306static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1307 xmlParserInputShrink(ctxt->input);
1308 if ((*ctxt->input->cur == 0) &&
1309 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1310 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001311 }
Owen Taylor3473f882001-02-23 17:55:21 +00001312
Daniel Veillarda880b122003-04-21 21:36:41 +00001313#define GROW if ((ctxt->progressive == 0) && \
1314 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001315 xmlGROW (ctxt);
1316
1317static void xmlGROW (xmlParserCtxtPtr ctxt) {
1318 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1319 if ((*ctxt->input->cur == 0) &&
1320 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1321 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001322}
Owen Taylor3473f882001-02-23 17:55:21 +00001323
1324#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1325
1326#define NEXT xmlNextChar(ctxt)
1327
Daniel Veillard21a0f912001-02-25 19:54:14 +00001328#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001329 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001330 ctxt->input->cur++; \
1331 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001332 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001333 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1334 }
1335
Owen Taylor3473f882001-02-23 17:55:21 +00001336#define NEXTL(l) do { \
1337 if (*(ctxt->input->cur) == '\n') { \
1338 ctxt->input->line++; ctxt->input->col = 1; \
1339 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001340 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001342 } while (0)
1343
1344#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1345#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1346
1347#define COPY_BUF(l,b,i,v) \
1348 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001349 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001350
1351/**
1352 * xmlSkipBlankChars:
1353 * @ctxt: the XML parser context
1354 *
1355 * skip all blanks character found at that point in the input streams.
1356 * It pops up finished entities in the process if allowable at that point.
1357 *
1358 * Returns the number of space chars skipped
1359 */
1360
1361int
1362xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001363 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001364
1365 /*
1366 * It's Okay to use CUR/NEXT here since all the blanks are on
1367 * the ASCII range.
1368 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001369 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1370 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001371 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001372 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001373 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001374 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001375 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001376 if (*cur == '\n') {
1377 ctxt->input->line++; ctxt->input->col = 1;
1378 }
1379 cur++;
1380 res++;
1381 if (*cur == 0) {
1382 ctxt->input->cur = cur;
1383 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1384 cur = ctxt->input->cur;
1385 }
1386 }
1387 ctxt->input->cur = cur;
1388 } else {
1389 int cur;
1390 do {
1391 cur = CUR;
1392 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
1393 NEXT;
1394 cur = CUR;
1395 res++;
1396 }
1397 while ((cur == 0) && (ctxt->inputNr > 1) &&
1398 (ctxt->instate != XML_PARSER_COMMENT)) {
1399 xmlPopInput(ctxt);
1400 cur = CUR;
1401 }
1402 /*
1403 * Need to handle support of entities branching here
1404 */
1405 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1406 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1407 }
Owen Taylor3473f882001-02-23 17:55:21 +00001408 return(res);
1409}
1410
1411/************************************************************************
1412 * *
1413 * Commodity functions to handle entities *
1414 * *
1415 ************************************************************************/
1416
1417/**
1418 * xmlPopInput:
1419 * @ctxt: an XML parser context
1420 *
1421 * xmlPopInput: the current input pointed by ctxt->input came to an end
1422 * pop it and return the next char.
1423 *
1424 * Returns the current xmlChar in the parser context
1425 */
1426xmlChar
1427xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001428 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001429 if (xmlParserDebugEntities)
1430 xmlGenericError(xmlGenericErrorContext,
1431 "Popping input %d\n", ctxt->inputNr);
1432 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001433 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001434 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1435 return(xmlPopInput(ctxt));
1436 return(CUR);
1437}
1438
1439/**
1440 * xmlPushInput:
1441 * @ctxt: an XML parser context
1442 * @input: an XML parser input fragment (entity, XML fragment ...).
1443 *
1444 * xmlPushInput: switch to a new input stream which is stacked on top
1445 * of the previous one(s).
1446 */
1447void
1448xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1449 if (input == NULL) return;
1450
1451 if (xmlParserDebugEntities) {
1452 if ((ctxt->input != NULL) && (ctxt->input->filename))
1453 xmlGenericError(xmlGenericErrorContext,
1454 "%s(%d): ", ctxt->input->filename,
1455 ctxt->input->line);
1456 xmlGenericError(xmlGenericErrorContext,
1457 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1458 }
1459 inputPush(ctxt, input);
1460 GROW;
1461}
1462
1463/**
1464 * xmlParseCharRef:
1465 * @ctxt: an XML parser context
1466 *
1467 * parse Reference declarations
1468 *
1469 * [66] CharRef ::= '&#' [0-9]+ ';' |
1470 * '&#x' [0-9a-fA-F]+ ';'
1471 *
1472 * [ WFC: Legal Character ]
1473 * Characters referred to using character references must match the
1474 * production for Char.
1475 *
1476 * Returns the value parsed (as an int), 0 in case of error
1477 */
1478int
1479xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001480 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001481 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001482 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001483
Owen Taylor3473f882001-02-23 17:55:21 +00001484 /*
1485 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1486 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001487 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001488 (NXT(2) == 'x')) {
1489 SKIP(3);
1490 GROW;
1491 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001492 if (count++ > 20) {
1493 count = 0;
1494 GROW;
1495 }
1496 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001497 val = val * 16 + (CUR - '0');
1498 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1499 val = val * 16 + (CUR - 'a') + 10;
1500 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1501 val = val * 16 + (CUR - 'A') + 10;
1502 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001503 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001504 val = 0;
1505 break;
1506 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001507 if (val > 0x10FFFF)
1508 outofrange = val;
1509
Owen Taylor3473f882001-02-23 17:55:21 +00001510 NEXT;
1511 count++;
1512 }
1513 if (RAW == ';') {
1514 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001515 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001516 ctxt->nbChars ++;
1517 ctxt->input->cur++;
1518 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001519 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001520 SKIP(2);
1521 GROW;
1522 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001523 if (count++ > 20) {
1524 count = 0;
1525 GROW;
1526 }
1527 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001528 val = val * 10 + (CUR - '0');
1529 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001530 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001531 val = 0;
1532 break;
1533 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001534 if (val > 0x10FFFF)
1535 outofrange = val;
1536
Owen Taylor3473f882001-02-23 17:55:21 +00001537 NEXT;
1538 count++;
1539 }
1540 if (RAW == ';') {
1541 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001542 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001543 ctxt->nbChars ++;
1544 ctxt->input->cur++;
1545 }
1546 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001548 }
1549
1550 /*
1551 * [ WFC: Legal Character ]
1552 * Characters referred to using character references must match the
1553 * production for Char.
1554 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001555 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001556 return(val);
1557 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001558 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1559 "xmlParseCharRef: invalid xmlChar value %d\n",
1560 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001561 }
1562 return(0);
1563}
1564
1565/**
1566 * xmlParseStringCharRef:
1567 * @ctxt: an XML parser context
1568 * @str: a pointer to an index in the string
1569 *
1570 * parse Reference declarations, variant parsing from a string rather
1571 * than an an input flow.
1572 *
1573 * [66] CharRef ::= '&#' [0-9]+ ';' |
1574 * '&#x' [0-9a-fA-F]+ ';'
1575 *
1576 * [ WFC: Legal Character ]
1577 * Characters referred to using character references must match the
1578 * production for Char.
1579 *
1580 * Returns the value parsed (as an int), 0 in case of error, str will be
1581 * updated to the current value of the index
1582 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001583static int
Owen Taylor3473f882001-02-23 17:55:21 +00001584xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1585 const xmlChar *ptr;
1586 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001587 unsigned int val = 0;
1588 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001589
1590 if ((str == NULL) || (*str == NULL)) return(0);
1591 ptr = *str;
1592 cur = *ptr;
1593 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1594 ptr += 3;
1595 cur = *ptr;
1596 while (cur != ';') { /* Non input consuming loop */
1597 if ((cur >= '0') && (cur <= '9'))
1598 val = val * 16 + (cur - '0');
1599 else if ((cur >= 'a') && (cur <= 'f'))
1600 val = val * 16 + (cur - 'a') + 10;
1601 else if ((cur >= 'A') && (cur <= 'F'))
1602 val = val * 16 + (cur - 'A') + 10;
1603 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001604 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001605 val = 0;
1606 break;
1607 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001608 if (val > 0x10FFFF)
1609 outofrange = val;
1610
Owen Taylor3473f882001-02-23 17:55:21 +00001611 ptr++;
1612 cur = *ptr;
1613 }
1614 if (cur == ';')
1615 ptr++;
1616 } else if ((cur == '&') && (ptr[1] == '#')){
1617 ptr += 2;
1618 cur = *ptr;
1619 while (cur != ';') { /* Non input consuming loops */
1620 if ((cur >= '0') && (cur <= '9'))
1621 val = val * 10 + (cur - '0');
1622 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001623 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001624 val = 0;
1625 break;
1626 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001627 if (val > 0x10FFFF)
1628 outofrange = val;
1629
Owen Taylor3473f882001-02-23 17:55:21 +00001630 ptr++;
1631 cur = *ptr;
1632 }
1633 if (cur == ';')
1634 ptr++;
1635 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001636 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001637 return(0);
1638 }
1639 *str = ptr;
1640
1641 /*
1642 * [ WFC: Legal Character ]
1643 * Characters referred to using character references must match the
1644 * production for Char.
1645 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001646 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001647 return(val);
1648 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001649 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1650 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1651 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001652 }
1653 return(0);
1654}
1655
1656/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001657 * xmlNewBlanksWrapperInputStream:
1658 * @ctxt: an XML parser context
1659 * @entity: an Entity pointer
1660 *
1661 * Create a new input stream for wrapping
1662 * blanks around a PEReference
1663 *
1664 * Returns the new input stream or NULL
1665 */
1666
1667static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1668
Daniel Veillardf4862f02002-09-10 11:13:43 +00001669static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001670xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1671 xmlParserInputPtr input;
1672 xmlChar *buffer;
1673 size_t length;
1674 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001675 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1676 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001677 return(NULL);
1678 }
1679 if (xmlParserDebugEntities)
1680 xmlGenericError(xmlGenericErrorContext,
1681 "new blanks wrapper for entity: %s\n", entity->name);
1682 input = xmlNewInputStream(ctxt);
1683 if (input == NULL) {
1684 return(NULL);
1685 }
1686 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001687 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001688 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001689 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001690 return(NULL);
1691 }
1692 buffer [0] = ' ';
1693 buffer [1] = '%';
1694 buffer [length-3] = ';';
1695 buffer [length-2] = ' ';
1696 buffer [length-1] = 0;
1697 memcpy(buffer + 2, entity->name, length - 5);
1698 input->free = deallocblankswrapper;
1699 input->base = buffer;
1700 input->cur = buffer;
1701 input->length = length;
1702 input->end = &buffer[length];
1703 return(input);
1704}
1705
1706/**
Owen Taylor3473f882001-02-23 17:55:21 +00001707 * xmlParserHandlePEReference:
1708 * @ctxt: the parser context
1709 *
1710 * [69] PEReference ::= '%' Name ';'
1711 *
1712 * [ WFC: No Recursion ]
1713 * A parsed entity must not contain a recursive
1714 * reference to itself, either directly or indirectly.
1715 *
1716 * [ WFC: Entity Declared ]
1717 * In a document without any DTD, a document with only an internal DTD
1718 * subset which contains no parameter entity references, or a document
1719 * with "standalone='yes'", ... ... The declaration of a parameter
1720 * entity must precede any reference to it...
1721 *
1722 * [ VC: Entity Declared ]
1723 * In a document with an external subset or external parameter entities
1724 * with "standalone='no'", ... ... The declaration of a parameter entity
1725 * must precede any reference to it...
1726 *
1727 * [ WFC: In DTD ]
1728 * Parameter-entity references may only appear in the DTD.
1729 * NOTE: misleading but this is handled.
1730 *
1731 * A PEReference may have been detected in the current input stream
1732 * the handling is done accordingly to
1733 * http://www.w3.org/TR/REC-xml#entproc
1734 * i.e.
1735 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001736 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001737 */
1738void
1739xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001740 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001741 xmlEntityPtr entity = NULL;
1742 xmlParserInputPtr input;
1743
Owen Taylor3473f882001-02-23 17:55:21 +00001744 if (RAW != '%') return;
1745 switch(ctxt->instate) {
1746 case XML_PARSER_CDATA_SECTION:
1747 return;
1748 case XML_PARSER_COMMENT:
1749 return;
1750 case XML_PARSER_START_TAG:
1751 return;
1752 case XML_PARSER_END_TAG:
1753 return;
1754 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001755 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001756 return;
1757 case XML_PARSER_PROLOG:
1758 case XML_PARSER_START:
1759 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001760 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001761 return;
1762 case XML_PARSER_ENTITY_DECL:
1763 case XML_PARSER_CONTENT:
1764 case XML_PARSER_ATTRIBUTE_VALUE:
1765 case XML_PARSER_PI:
1766 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001767 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001768 /* we just ignore it there */
1769 return;
1770 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001771 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001772 return;
1773 case XML_PARSER_ENTITY_VALUE:
1774 /*
1775 * NOTE: in the case of entity values, we don't do the
1776 * substitution here since we need the literal
1777 * entity value to be able to save the internal
1778 * subset of the document.
1779 * This will be handled by xmlStringDecodeEntities
1780 */
1781 return;
1782 case XML_PARSER_DTD:
1783 /*
1784 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1785 * In the internal DTD subset, parameter-entity references
1786 * can occur only where markup declarations can occur, not
1787 * within markup declarations.
1788 * In that case this is handled in xmlParseMarkupDecl
1789 */
1790 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1791 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001792 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001793 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001794 break;
1795 case XML_PARSER_IGNORE:
1796 return;
1797 }
1798
1799 NEXT;
1800 name = xmlParseName(ctxt);
1801 if (xmlParserDebugEntities)
1802 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001803 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001804 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001805 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001806 } else {
1807 if (RAW == ';') {
1808 NEXT;
1809 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1810 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1811 if (entity == NULL) {
1812
1813 /*
1814 * [ WFC: Entity Declared ]
1815 * In a document without any DTD, a document with only an
1816 * internal DTD subset which contains no parameter entity
1817 * references, or a document with "standalone='yes'", ...
1818 * ... The declaration of a parameter entity must precede
1819 * any reference to it...
1820 */
1821 if ((ctxt->standalone == 1) ||
1822 ((ctxt->hasExternalSubset == 0) &&
1823 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001825 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001826 } else {
1827 /*
1828 * [ VC: Entity Declared ]
1829 * In a document with an external subset or external
1830 * parameter entities with "standalone='no'", ...
1831 * ... The declaration of a parameter entity must precede
1832 * any reference to it...
1833 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001834 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1835 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1836 "PEReference: %%%s; not found\n",
1837 name);
1838 } else
1839 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1840 "PEReference: %%%s; not found\n",
1841 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001842 ctxt->valid = 0;
1843 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001844 } else if (ctxt->input->free != deallocblankswrapper) {
1845 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1846 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001847 } else {
1848 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1849 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001850 xmlChar start[4];
1851 xmlCharEncoding enc;
1852
Owen Taylor3473f882001-02-23 17:55:21 +00001853 /*
1854 * handle the extra spaces added before and after
1855 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001856 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001857 */
1858 input = xmlNewEntityInputStream(ctxt, entity);
1859 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001860
1861 /*
1862 * Get the 4 first bytes and decode the charset
1863 * if enc != XML_CHAR_ENCODING_NONE
1864 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001865 * Note that, since we may have some non-UTF8
1866 * encoding (like UTF16, bug 135229), the 'length'
1867 * is not known, but we can calculate based upon
1868 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001869 */
1870 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001871 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001872 start[0] = RAW;
1873 start[1] = NXT(1);
1874 start[2] = NXT(2);
1875 start[3] = NXT(3);
1876 enc = xmlDetectCharEncoding(start, 4);
1877 if (enc != XML_CHAR_ENCODING_NONE) {
1878 xmlSwitchEncoding(ctxt, enc);
1879 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001880 }
1881
Owen Taylor3473f882001-02-23 17:55:21 +00001882 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001883 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1884 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001885 xmlParseTextDecl(ctxt);
1886 }
Owen Taylor3473f882001-02-23 17:55:21 +00001887 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001888 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1889 "PEReference: %s is not a parameter entity\n",
1890 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001891 }
1892 }
1893 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001894 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
Owen Taylor3473f882001-02-23 17:55:21 +00001896 }
1897}
1898
1899/*
1900 * Macro used to grow the current buffer.
1901 */
1902#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001903 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001904 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001905 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001906 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001907 if (tmp == NULL) goto mem_error; \
1908 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001909}
1910
1911/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001912 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001913 * @ctxt: the parser context
1914 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001916 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1917 * @end: an end marker xmlChar, 0 if none
1918 * @end2: an end marker xmlChar, 0 if none
1919 * @end3: an end marker xmlChar, 0 if none
1920 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001921 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001922 *
1923 * [67] Reference ::= EntityRef | CharRef
1924 *
1925 * [69] PEReference ::= '%' Name ';'
1926 *
1927 * Returns A newly allocated string with the substitution done. The caller
1928 * must deallocate it !
1929 */
1930xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1932 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001933 xmlChar *buffer = NULL;
1934 int buffer_size = 0;
1935
1936 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001937 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001938 xmlEntityPtr ent;
1939 int c,l;
1940 int nbchars = 0;
1941
Daniel Veillarda82b1822004-11-08 16:24:57 +00001942 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001944 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001945
1946 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001947 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 return(NULL);
1949 }
1950
1951 /*
1952 * allocate a translation buffer.
1953 */
1954 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001955 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001956 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001957
1958 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001959 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001960 * we are operating on already parsed values.
1961 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001962 if (str < last)
1963 c = CUR_SCHAR(str, l);
1964 else
1965 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 while ((c != 0) && (c != end) && /* non input consuming loop */
1967 (c != end2) && (c != end3)) {
1968
1969 if (c == 0) break;
1970 if ((c == '&') && (str[1] == '#')) {
1971 int val = xmlParseStringCharRef(ctxt, &str);
1972 if (val != 0) {
1973 COPY_BUF(0,buffer,nbchars,val);
1974 }
1975 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1976 if (xmlParserDebugEntities)
1977 xmlGenericError(xmlGenericErrorContext,
1978 "String decoding Entity Reference: %.30s\n",
1979 str);
1980 ent = xmlParseStringEntityRef(ctxt, &str);
1981 if ((ent != NULL) &&
1982 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1983 if (ent->content != NULL) {
1984 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1985 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001986 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1987 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001988 }
1989 } else if ((ent != NULL) && (ent->content != NULL)) {
1990 xmlChar *rep;
1991
1992 ctxt->depth++;
1993 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1994 0, 0, 0);
1995 ctxt->depth--;
1996 if (rep != NULL) {
1997 current = rep;
1998 while (*current != 0) { /* non input consuming loop */
1999 buffer[nbchars++] = *current++;
2000 if (nbchars >
2001 buffer_size - XML_PARSER_BUFFER_SIZE) {
2002 growBuffer(buffer);
2003 }
2004 }
2005 xmlFree(rep);
2006 }
2007 } else if (ent != NULL) {
2008 int i = xmlStrlen(ent->name);
2009 const xmlChar *cur = ent->name;
2010
2011 buffer[nbchars++] = '&';
2012 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2013 growBuffer(buffer);
2014 }
2015 for (;i > 0;i--)
2016 buffer[nbchars++] = *cur++;
2017 buffer[nbchars++] = ';';
2018 }
2019 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2020 if (xmlParserDebugEntities)
2021 xmlGenericError(xmlGenericErrorContext,
2022 "String decoding PE Reference: %.30s\n", str);
2023 ent = xmlParseStringPEReference(ctxt, &str);
2024 if (ent != NULL) {
2025 xmlChar *rep;
2026
2027 ctxt->depth++;
2028 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2029 0, 0, 0);
2030 ctxt->depth--;
2031 if (rep != NULL) {
2032 current = rep;
2033 while (*current != 0) { /* non input consuming loop */
2034 buffer[nbchars++] = *current++;
2035 if (nbchars >
2036 buffer_size - XML_PARSER_BUFFER_SIZE) {
2037 growBuffer(buffer);
2038 }
2039 }
2040 xmlFree(rep);
2041 }
2042 }
2043 } else {
2044 COPY_BUF(l,buffer,nbchars,c);
2045 str += l;
2046 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2047 growBuffer(buffer);
2048 }
2049 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002050 if (str < last)
2051 c = CUR_SCHAR(str, l);
2052 else
2053 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002054 }
2055 buffer[nbchars++] = 0;
2056 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002057
2058mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002059 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002060 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002061}
2062
Daniel Veillarde57ec792003-09-10 10:50:59 +00002063/**
2064 * xmlStringDecodeEntities:
2065 * @ctxt: the parser context
2066 * @str: the input string
2067 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2068 * @end: an end marker xmlChar, 0 if none
2069 * @end2: an end marker xmlChar, 0 if none
2070 * @end3: an end marker xmlChar, 0 if none
2071 *
2072 * Takes a entity string content and process to do the adequate substitutions.
2073 *
2074 * [67] Reference ::= EntityRef | CharRef
2075 *
2076 * [69] PEReference ::= '%' Name ';'
2077 *
2078 * Returns A newly allocated string with the substitution done. The caller
2079 * must deallocate it !
2080 */
2081xmlChar *
2082xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2083 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002084 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002085 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2086 end, end2, end3));
2087}
Owen Taylor3473f882001-02-23 17:55:21 +00002088
2089/************************************************************************
2090 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002091 * Commodity functions, cleanup needed ? *
2092 * *
2093 ************************************************************************/
2094
2095/**
2096 * areBlanks:
2097 * @ctxt: an XML parser context
2098 * @str: a xmlChar *
2099 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002100 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002101 *
2102 * Is this a sequence of blank chars that one can ignore ?
2103 *
2104 * Returns 1 if ignorable 0 otherwise.
2105 */
2106
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002107static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2108 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002109 int i, ret;
2110 xmlNodePtr lastChild;
2111
Daniel Veillard05c13a22001-09-09 08:38:09 +00002112 /*
2113 * Don't spend time trying to differentiate them, the same callback is
2114 * used !
2115 */
2116 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002117 return(0);
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119 /*
2120 * Check for xml:space value.
2121 */
2122 if (*(ctxt->space) == 1)
2123 return(0);
2124
2125 /*
2126 * Check that the string is made of blanks
2127 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002128 if (blank_chars == 0) {
2129 for (i = 0;i < len;i++)
2130 if (!(IS_BLANK_CH(str[i]))) return(0);
2131 }
Owen Taylor3473f882001-02-23 17:55:21 +00002132
2133 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002134 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002135 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002136 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002137 if (ctxt->myDoc != NULL) {
2138 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2139 if (ret == 0) return(1);
2140 if (ret == 1) return(0);
2141 }
2142
2143 /*
2144 * Otherwise, heuristic :-\
2145 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002146 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002147 if ((ctxt->node->children == NULL) &&
2148 (RAW == '<') && (NXT(1) == '/')) return(0);
2149
2150 lastChild = xmlGetLastChild(ctxt->node);
2151 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002152 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2153 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002154 } else if (xmlNodeIsText(lastChild))
2155 return(0);
2156 else if ((ctxt->node->children != NULL) &&
2157 (xmlNodeIsText(ctxt->node->children)))
2158 return(0);
2159 return(1);
2160}
2161
Owen Taylor3473f882001-02-23 17:55:21 +00002162/************************************************************************
2163 * *
2164 * Extra stuff for namespace support *
2165 * Relates to http://www.w3.org/TR/WD-xml-names *
2166 * *
2167 ************************************************************************/
2168
2169/**
2170 * xmlSplitQName:
2171 * @ctxt: an XML parser context
2172 * @name: an XML parser context
2173 * @prefix: a xmlChar **
2174 *
2175 * parse an UTF8 encoded XML qualified name string
2176 *
2177 * [NS 5] QName ::= (Prefix ':')? LocalPart
2178 *
2179 * [NS 6] Prefix ::= NCName
2180 *
2181 * [NS 7] LocalPart ::= NCName
2182 *
2183 * Returns the local part, and prefix is updated
2184 * to get the Prefix if any.
2185 */
2186
2187xmlChar *
2188xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2189 xmlChar buf[XML_MAX_NAMELEN + 5];
2190 xmlChar *buffer = NULL;
2191 int len = 0;
2192 int max = XML_MAX_NAMELEN;
2193 xmlChar *ret = NULL;
2194 const xmlChar *cur = name;
2195 int c;
2196
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002197 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002198 *prefix = NULL;
2199
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002200 if (cur == NULL) return(NULL);
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202#ifndef XML_XML_NAMESPACE
2203 /* xml: prefix is not really a namespace */
2204 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2205 (cur[2] == 'l') && (cur[3] == ':'))
2206 return(xmlStrdup(name));
2207#endif
2208
Daniel Veillard597bc482003-07-24 16:08:28 +00002209 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002210 if (cur[0] == ':')
2211 return(xmlStrdup(name));
2212
2213 c = *cur++;
2214 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2215 buf[len++] = c;
2216 c = *cur++;
2217 }
2218 if (len >= max) {
2219 /*
2220 * Okay someone managed to make a huge name, so he's ready to pay
2221 * for the processing speed.
2222 */
2223 max = len * 2;
2224
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002225 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002226 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002227 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(NULL);
2229 }
2230 memcpy(buffer, buf, len);
2231 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2232 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002233 xmlChar *tmp;
2234
Owen Taylor3473f882001-02-23 17:55:21 +00002235 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002236 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002237 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002238 if (tmp == NULL) {
2239 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002241 return(NULL);
2242 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002243 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002244 }
2245 buffer[len++] = c;
2246 c = *cur++;
2247 }
2248 buffer[len] = 0;
2249 }
2250
Daniel Veillard597bc482003-07-24 16:08:28 +00002251 /* nasty but well=formed
2252 if ((c == ':') && (*cur == 0)) {
2253 return(xmlStrdup(name));
2254 } */
2255
Owen Taylor3473f882001-02-23 17:55:21 +00002256 if (buffer == NULL)
2257 ret = xmlStrndup(buf, len);
2258 else {
2259 ret = buffer;
2260 buffer = NULL;
2261 max = XML_MAX_NAMELEN;
2262 }
2263
2264
2265 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002266 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002268 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002269 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 len = 0;
2272
Daniel Veillardbb284f42002-10-16 18:02:47 +00002273 /*
2274 * Check that the first character is proper to start
2275 * a new name
2276 */
2277 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2278 ((c >= 0x41) && (c <= 0x5A)) ||
2279 (c == '_') || (c == ':'))) {
2280 int l;
2281 int first = CUR_SCHAR(cur, l);
2282
2283 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002284 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002285 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002286 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002287 }
2288 }
2289 cur++;
2290
Owen Taylor3473f882001-02-23 17:55:21 +00002291 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2292 buf[len++] = c;
2293 c = *cur++;
2294 }
2295 if (len >= max) {
2296 /*
2297 * Okay someone managed to make a huge name, so he's ready to pay
2298 * for the processing speed.
2299 */
2300 max = len * 2;
2301
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002302 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002304 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002305 return(NULL);
2306 }
2307 memcpy(buffer, buf, len);
2308 while (c != 0) { /* tested bigname2.xml */
2309 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002310 xmlChar *tmp;
2311
Owen Taylor3473f882001-02-23 17:55:21 +00002312 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002313 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002314 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002315 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002316 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002317 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002318 return(NULL);
2319 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002320 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002321 }
2322 buffer[len++] = c;
2323 c = *cur++;
2324 }
2325 buffer[len] = 0;
2326 }
2327
2328 if (buffer == NULL)
2329 ret = xmlStrndup(buf, len);
2330 else {
2331 ret = buffer;
2332 }
2333 }
2334
2335 return(ret);
2336}
2337
2338/************************************************************************
2339 * *
2340 * The parser itself *
2341 * Relates to http://www.w3.org/TR/REC-xml *
2342 * *
2343 ************************************************************************/
2344
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002345static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002346static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002347 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002348
Owen Taylor3473f882001-02-23 17:55:21 +00002349/**
2350 * xmlParseName:
2351 * @ctxt: an XML parser context
2352 *
2353 * parse an XML name.
2354 *
2355 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2356 * CombiningChar | Extender
2357 *
2358 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2359 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002360 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002361 *
2362 * Returns the Name parsed or NULL
2363 */
2364
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002365const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002366xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002367 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002368 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002369 int count = 0;
2370
2371 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002372
2373 /*
2374 * Accelerator for simple ASCII names
2375 */
2376 in = ctxt->input->cur;
2377 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2378 ((*in >= 0x41) && (*in <= 0x5A)) ||
2379 (*in == '_') || (*in == ':')) {
2380 in++;
2381 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2382 ((*in >= 0x41) && (*in <= 0x5A)) ||
2383 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002384 (*in == '_') || (*in == '-') ||
2385 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002386 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002387 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002388 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002390 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002391 ctxt->nbChars += count;
2392 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002393 if (ret == NULL)
2394 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002395 return(ret);
2396 }
2397 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002398 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002399}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002400
Daniel Veillard46de64e2002-05-29 08:21:33 +00002401/**
2402 * xmlParseNameAndCompare:
2403 * @ctxt: an XML parser context
2404 *
2405 * parse an XML name and compares for match
2406 * (specialized for endtag parsing)
2407 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002408 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2409 * and the name for mismatch
2410 */
2411
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002412static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002413xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002414 register const xmlChar *cmp = other;
2415 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002416 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002417
2418 GROW;
2419
2420 in = ctxt->input->cur;
2421 while (*in != 0 && *in == *cmp) {
2422 ++in;
2423 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002424 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002425 }
William M. Brack76e95df2003-10-18 16:20:14 +00002426 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002427 /* success */
2428 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002429 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002430 }
2431 /* failure (or end of input buffer), check with full function */
2432 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002433 /* strings coming from the dictionnary direct compare possible */
2434 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002435 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002436 }
2437 return ret;
2438}
2439
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002440static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002442 int len = 0, l;
2443 int c;
2444 int count = 0;
2445
2446 /*
2447 * Handler for more complex cases
2448 */
2449 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002450 c = CUR_CHAR(l);
2451 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2452 (!IS_LETTER(c) && (c != '_') &&
2453 (c != ':'))) {
2454 return(NULL);
2455 }
2456
2457 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002458 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002459 (c == '.') || (c == '-') ||
2460 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002461 (IS_COMBINING(c)) ||
2462 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002463 if (count++ > 100) {
2464 count = 0;
2465 GROW;
2466 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002467 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002468 NEXTL(l);
2469 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002471 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002472}
2473
2474/**
2475 * xmlParseStringName:
2476 * @ctxt: an XML parser context
2477 * @str: a pointer to the string pointer (IN/OUT)
2478 *
2479 * parse an XML name.
2480 *
2481 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2482 * CombiningChar | Extender
2483 *
2484 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2485 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002486 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002487 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002488 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002489 * is updated to the current location in the string.
2490 */
2491
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002492static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002493xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2494 xmlChar buf[XML_MAX_NAMELEN + 5];
2495 const xmlChar *cur = *str;
2496 int len = 0, l;
2497 int c;
2498
2499 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002500 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002501 (c != ':')) {
2502 return(NULL);
2503 }
2504
William M. Brack871611b2003-10-18 04:53:14 +00002505 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002506 (c == '.') || (c == '-') ||
2507 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002508 (IS_COMBINING(c)) ||
2509 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002510 COPY_BUF(l,buf,len,c);
2511 cur += l;
2512 c = CUR_SCHAR(cur, l);
2513 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2514 /*
2515 * Okay someone managed to make a huge name, so he's ready to pay
2516 * for the processing speed.
2517 */
2518 xmlChar *buffer;
2519 int max = len * 2;
2520
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002521 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002522 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002523 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002524 return(NULL);
2525 }
2526 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002527 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002528 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002529 (c == '.') || (c == '-') ||
2530 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002531 (IS_COMBINING(c)) ||
2532 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002533 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002534 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002536 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002537 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002538 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002539 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002540 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002541 return(NULL);
2542 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002543 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002544 }
2545 COPY_BUF(l,buffer,len,c);
2546 cur += l;
2547 c = CUR_SCHAR(cur, l);
2548 }
2549 buffer[len] = 0;
2550 *str = cur;
2551 return(buffer);
2552 }
2553 }
2554 *str = cur;
2555 return(xmlStrndup(buf, len));
2556}
2557
2558/**
2559 * xmlParseNmtoken:
2560 * @ctxt: an XML parser context
2561 *
2562 * parse an XML Nmtoken.
2563 *
2564 * [7] Nmtoken ::= (NameChar)+
2565 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002566 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002567 *
2568 * Returns the Nmtoken parsed or NULL
2569 */
2570
2571xmlChar *
2572xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2573 xmlChar buf[XML_MAX_NAMELEN + 5];
2574 int len = 0, l;
2575 int c;
2576 int count = 0;
2577
2578 GROW;
2579 c = CUR_CHAR(l);
2580
William M. Brack871611b2003-10-18 04:53:14 +00002581 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002582 (c == '.') || (c == '-') ||
2583 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002584 (IS_COMBINING(c)) ||
2585 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002586 if (count++ > 100) {
2587 count = 0;
2588 GROW;
2589 }
2590 COPY_BUF(l,buf,len,c);
2591 NEXTL(l);
2592 c = CUR_CHAR(l);
2593 if (len >= XML_MAX_NAMELEN) {
2594 /*
2595 * Okay someone managed to make a huge token, so he's ready to pay
2596 * for the processing speed.
2597 */
2598 xmlChar *buffer;
2599 int max = len * 2;
2600
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002601 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002602 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002603 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002604 return(NULL);
2605 }
2606 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002607 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002608 (c == '.') || (c == '-') ||
2609 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002610 (IS_COMBINING(c)) ||
2611 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002612 if (count++ > 100) {
2613 count = 0;
2614 GROW;
2615 }
2616 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002617 xmlChar *tmp;
2618
Owen Taylor3473f882001-02-23 17:55:21 +00002619 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002620 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002621 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002622 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002624 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002625 return(NULL);
2626 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002627 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 }
2629 COPY_BUF(l,buffer,len,c);
2630 NEXTL(l);
2631 c = CUR_CHAR(l);
2632 }
2633 buffer[len] = 0;
2634 return(buffer);
2635 }
2636 }
2637 if (len == 0)
2638 return(NULL);
2639 return(xmlStrndup(buf, len));
2640}
2641
2642/**
2643 * xmlParseEntityValue:
2644 * @ctxt: an XML parser context
2645 * @orig: if non-NULL store a copy of the original entity value
2646 *
2647 * parse a value for ENTITY declarations
2648 *
2649 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2650 * "'" ([^%&'] | PEReference | Reference)* "'"
2651 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002652 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002653 */
2654
2655xmlChar *
2656xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2657 xmlChar *buf = NULL;
2658 int len = 0;
2659 int size = XML_PARSER_BUFFER_SIZE;
2660 int c, l;
2661 xmlChar stop;
2662 xmlChar *ret = NULL;
2663 const xmlChar *cur = NULL;
2664 xmlParserInputPtr input;
2665
2666 if (RAW == '"') stop = '"';
2667 else if (RAW == '\'') stop = '\'';
2668 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002669 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002670 return(NULL);
2671 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002672 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002673 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002674 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002675 return(NULL);
2676 }
2677
2678 /*
2679 * The content of the entity definition is copied in a buffer.
2680 */
2681
2682 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2683 input = ctxt->input;
2684 GROW;
2685 NEXT;
2686 c = CUR_CHAR(l);
2687 /*
2688 * NOTE: 4.4.5 Included in Literal
2689 * When a parameter entity reference appears in a literal entity
2690 * value, ... a single or double quote character in the replacement
2691 * text is always treated as a normal data character and will not
2692 * terminate the literal.
2693 * In practice it means we stop the loop only when back at parsing
2694 * the initial entity and the quote is found
2695 */
William M. Brack871611b2003-10-18 04:53:14 +00002696 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002697 (ctxt->input != input))) {
2698 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002699 xmlChar *tmp;
2700
Owen Taylor3473f882001-02-23 17:55:21 +00002701 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002702 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2703 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002704 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002705 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 return(NULL);
2707 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002708 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002709 }
2710 COPY_BUF(l,buf,len,c);
2711 NEXTL(l);
2712 /*
2713 * Pop-up of finished entities.
2714 */
2715 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2716 xmlPopInput(ctxt);
2717
2718 GROW;
2719 c = CUR_CHAR(l);
2720 if (c == 0) {
2721 GROW;
2722 c = CUR_CHAR(l);
2723 }
2724 }
2725 buf[len] = 0;
2726
2727 /*
2728 * Raise problem w.r.t. '&' and '%' being used in non-entities
2729 * reference constructs. Note Charref will be handled in
2730 * xmlStringDecodeEntities()
2731 */
2732 cur = buf;
2733 while (*cur != 0) { /* non input consuming */
2734 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2735 xmlChar *name;
2736 xmlChar tmp = *cur;
2737
2738 cur++;
2739 name = xmlParseStringName(ctxt, &cur);
2740 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002741 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002742 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002743 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002744 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002745 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2746 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002747 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002748 }
2749 if (name != NULL)
2750 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002751 if (*cur == 0)
2752 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 cur++;
2755 }
2756
2757 /*
2758 * Then PEReference entities are substituted.
2759 */
2760 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002761 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002762 xmlFree(buf);
2763 } else {
2764 NEXT;
2765 /*
2766 * NOTE: 4.4.7 Bypassed
2767 * When a general entity reference appears in the EntityValue in
2768 * an entity declaration, it is bypassed and left as is.
2769 * so XML_SUBSTITUTE_REF is not set here.
2770 */
2771 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2772 0, 0, 0);
2773 if (orig != NULL)
2774 *orig = buf;
2775 else
2776 xmlFree(buf);
2777 }
2778
2779 return(ret);
2780}
2781
2782/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002783 * xmlParseAttValueComplex:
2784 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002785 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002786 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002787 *
2788 * parse a value for an attribute, this is the fallback function
2789 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002790 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002791 *
2792 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2793 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002794static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002795xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002796 xmlChar limit = 0;
2797 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002798 int len = 0;
2799 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002800 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002801 xmlChar *current = NULL;
2802 xmlEntityPtr ent;
2803
Owen Taylor3473f882001-02-23 17:55:21 +00002804 if (NXT(0) == '"') {
2805 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2806 limit = '"';
2807 NEXT;
2808 } else if (NXT(0) == '\'') {
2809 limit = '\'';
2810 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2811 NEXT;
2812 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002813 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002814 return(NULL);
2815 }
2816
2817 /*
2818 * allocate a translation buffer.
2819 */
2820 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002821 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002822 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002823
2824 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002825 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002826 */
2827 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002828 while ((NXT(0) != limit) && /* checked */
2829 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002830 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002831 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002832 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002833 if (NXT(1) == '#') {
2834 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002835
Owen Taylor3473f882001-02-23 17:55:21 +00002836 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002837 if (ctxt->replaceEntities) {
2838 if (len > buf_size - 10) {
2839 growBuffer(buf);
2840 }
2841 buf[len++] = '&';
2842 } else {
2843 /*
2844 * The reparsing will be done in xmlStringGetNodeList()
2845 * called by the attribute() function in SAX.c
2846 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002847 if (len > buf_size - 10) {
2848 growBuffer(buf);
2849 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002850 buf[len++] = '&';
2851 buf[len++] = '#';
2852 buf[len++] = '3';
2853 buf[len++] = '8';
2854 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002855 }
2856 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002857 if (len > buf_size - 10) {
2858 growBuffer(buf);
2859 }
Owen Taylor3473f882001-02-23 17:55:21 +00002860 len += xmlCopyChar(0, &buf[len], val);
2861 }
2862 } else {
2863 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002864 if ((ent != NULL) &&
2865 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2866 if (len > buf_size - 10) {
2867 growBuffer(buf);
2868 }
2869 if ((ctxt->replaceEntities == 0) &&
2870 (ent->content[0] == '&')) {
2871 buf[len++] = '&';
2872 buf[len++] = '#';
2873 buf[len++] = '3';
2874 buf[len++] = '8';
2875 buf[len++] = ';';
2876 } else {
2877 buf[len++] = ent->content[0];
2878 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002879 } else if ((ent != NULL) &&
2880 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 xmlChar *rep;
2882
2883 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2884 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002885 XML_SUBSTITUTE_REF,
2886 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002887 if (rep != NULL) {
2888 current = rep;
2889 while (*current != 0) { /* non input consuming */
2890 buf[len++] = *current++;
2891 if (len > buf_size - 10) {
2892 growBuffer(buf);
2893 }
2894 }
2895 xmlFree(rep);
2896 }
2897 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002898 if (len > buf_size - 10) {
2899 growBuffer(buf);
2900 }
Owen Taylor3473f882001-02-23 17:55:21 +00002901 if (ent->content != NULL)
2902 buf[len++] = ent->content[0];
2903 }
2904 } else if (ent != NULL) {
2905 int i = xmlStrlen(ent->name);
2906 const xmlChar *cur = ent->name;
2907
2908 /*
2909 * This may look absurd but is needed to detect
2910 * entities problems
2911 */
2912 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2913 (ent->content != NULL)) {
2914 xmlChar *rep;
2915 rep = xmlStringDecodeEntities(ctxt, ent->content,
2916 XML_SUBSTITUTE_REF, 0, 0, 0);
2917 if (rep != NULL)
2918 xmlFree(rep);
2919 }
2920
2921 /*
2922 * Just output the reference
2923 */
2924 buf[len++] = '&';
2925 if (len > buf_size - i - 10) {
2926 growBuffer(buf);
2927 }
2928 for (;i > 0;i--)
2929 buf[len++] = *cur++;
2930 buf[len++] = ';';
2931 }
2932 }
2933 } else {
2934 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002935 if ((len != 0) || (!normalize)) {
2936 if ((!normalize) || (!in_space)) {
2937 COPY_BUF(l,buf,len,0x20);
2938 if (len > buf_size - 10) {
2939 growBuffer(buf);
2940 }
2941 }
2942 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002943 }
2944 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002945 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002946 COPY_BUF(l,buf,len,c);
2947 if (len > buf_size - 10) {
2948 growBuffer(buf);
2949 }
2950 }
2951 NEXTL(l);
2952 }
2953 GROW;
2954 c = CUR_CHAR(l);
2955 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002956 if ((in_space) && (normalize)) {
2957 while (buf[len - 1] == 0x20) len--;
2958 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002959 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002960 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002961 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002962 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002963 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2964 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002965 } else
2966 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002967 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002968 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002969
2970mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002971 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002972 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002973}
2974
2975/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002976 * xmlParseAttValue:
2977 * @ctxt: an XML parser context
2978 *
2979 * parse a value for an attribute
2980 * Note: the parser won't do substitution of entities here, this
2981 * will be handled later in xmlStringGetNodeList
2982 *
2983 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2984 * "'" ([^<&'] | Reference)* "'"
2985 *
2986 * 3.3.3 Attribute-Value Normalization:
2987 * Before the value of an attribute is passed to the application or
2988 * checked for validity, the XML processor must normalize it as follows:
2989 * - a character reference is processed by appending the referenced
2990 * character to the attribute value
2991 * - an entity reference is processed by recursively processing the
2992 * replacement text of the entity
2993 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2994 * appending #x20 to the normalized value, except that only a single
2995 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2996 * parsed entity or the literal entity value of an internal parsed entity
2997 * - other characters are processed by appending them to the normalized value
2998 * If the declared value is not CDATA, then the XML processor must further
2999 * process the normalized attribute value by discarding any leading and
3000 * trailing space (#x20) characters, and by replacing sequences of space
3001 * (#x20) characters by a single space (#x20) character.
3002 * All attributes for which no declaration has been read should be treated
3003 * by a non-validating parser as if declared CDATA.
3004 *
3005 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3006 */
3007
3008
3009xmlChar *
3010xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003011 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013}
3014
3015/**
Owen Taylor3473f882001-02-23 17:55:21 +00003016 * xmlParseSystemLiteral:
3017 * @ctxt: an XML parser context
3018 *
3019 * parse an XML Literal
3020 *
3021 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3022 *
3023 * Returns the SystemLiteral parsed or NULL
3024 */
3025
3026xmlChar *
3027xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3028 xmlChar *buf = NULL;
3029 int len = 0;
3030 int size = XML_PARSER_BUFFER_SIZE;
3031 int cur, l;
3032 xmlChar stop;
3033 int state = ctxt->instate;
3034 int count = 0;
3035
3036 SHRINK;
3037 if (RAW == '"') {
3038 NEXT;
3039 stop = '"';
3040 } else if (RAW == '\'') {
3041 NEXT;
3042 stop = '\'';
3043 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003045 return(NULL);
3046 }
3047
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003048 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003049 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003050 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003051 return(NULL);
3052 }
3053 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3054 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003055 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003056 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003057 xmlChar *tmp;
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003060 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3061 if (tmp == NULL) {
3062 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003063 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 ctxt->instate = (xmlParserInputState) state;
3065 return(NULL);
3066 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003067 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003068 }
3069 count++;
3070 if (count > 50) {
3071 GROW;
3072 count = 0;
3073 }
3074 COPY_BUF(l,buf,len,cur);
3075 NEXTL(l);
3076 cur = CUR_CHAR(l);
3077 if (cur == 0) {
3078 GROW;
3079 SHRINK;
3080 cur = CUR_CHAR(l);
3081 }
3082 }
3083 buf[len] = 0;
3084 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003085 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003086 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003087 } else {
3088 NEXT;
3089 }
3090 return(buf);
3091}
3092
3093/**
3094 * xmlParsePubidLiteral:
3095 * @ctxt: an XML parser context
3096 *
3097 * parse an XML public literal
3098 *
3099 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3100 *
3101 * Returns the PubidLiteral parsed or NULL.
3102 */
3103
3104xmlChar *
3105xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3106 xmlChar *buf = NULL;
3107 int len = 0;
3108 int size = XML_PARSER_BUFFER_SIZE;
3109 xmlChar cur;
3110 xmlChar stop;
3111 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003112 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003113
3114 SHRINK;
3115 if (RAW == '"') {
3116 NEXT;
3117 stop = '"';
3118 } else if (RAW == '\'') {
3119 NEXT;
3120 stop = '\'';
3121 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003122 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003123 return(NULL);
3124 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003125 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003126 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003127 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003128 return(NULL);
3129 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003130 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003131 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003132 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003134 xmlChar *tmp;
3135
Owen Taylor3473f882001-02-23 17:55:21 +00003136 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003137 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3138 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003139 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003140 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003141 return(NULL);
3142 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003143 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003144 }
3145 buf[len++] = cur;
3146 count++;
3147 if (count > 50) {
3148 GROW;
3149 count = 0;
3150 }
3151 NEXT;
3152 cur = CUR;
3153 if (cur == 0) {
3154 GROW;
3155 SHRINK;
3156 cur = CUR;
3157 }
3158 }
3159 buf[len] = 0;
3160 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003161 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 } else {
3163 NEXT;
3164 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003165 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003166 return(buf);
3167}
3168
Daniel Veillard48b2f892001-02-25 16:11:03 +00003169void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003170/**
3171 * xmlParseCharData:
3172 * @ctxt: an XML parser context
3173 * @cdata: int indicating whether we are within a CDATA section
3174 *
3175 * parse a CharData section.
3176 * if we are within a CDATA section ']]>' marks an end of section.
3177 *
3178 * The right angle bracket (>) may be represented using the string "&gt;",
3179 * and must, for compatibility, be escaped using "&gt;" or a character
3180 * reference when it appears in the string "]]>" in content, when that
3181 * string is not marking the end of a CDATA section.
3182 *
3183 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3184 */
3185
3186void
3187xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003188 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003189 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003190 int line = ctxt->input->line;
3191 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003192
3193 SHRINK;
3194 GROW;
3195 /*
3196 * Accelerated common case where input don't need to be
3197 * modified before passing it to the handler.
3198 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003199 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003200 in = ctxt->input->cur;
3201 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003202get_more_space:
3203 while (*in == 0x20) in++;
3204 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003205 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003206 in++;
3207 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003208 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003209 in++;
3210 }
3211 goto get_more_space;
3212 }
3213 if (*in == '<') {
3214 nbchar = in - ctxt->input->cur;
3215 if (nbchar > 0) {
3216 const xmlChar *tmp = ctxt->input->cur;
3217 ctxt->input->cur = in;
3218
Daniel Veillard34099b42004-11-04 17:34:35 +00003219 if ((ctxt->sax != NULL) &&
3220 (ctxt->sax->ignorableWhitespace !=
3221 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003222 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3223 ctxt->sax->ignorableWhitespace(ctxt->userData,
3224 tmp, nbchar);
3225 } else if (ctxt->sax->characters != NULL)
3226 ctxt->sax->characters(ctxt->userData,
3227 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003228 } else if ((ctxt->sax != NULL) &&
3229 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003230 ctxt->sax->characters(ctxt->userData,
3231 tmp, nbchar);
3232 }
3233 }
3234 return;
3235 }
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003236get_more:
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003237 while (((*in > ']') && (*in <= 0x7F)) ||
3238 ((*in > '&') && (*in < '<')) ||
3239 ((*in > '<') && (*in < ']')) ||
3240 ((*in >= 0x20) && (*in < '&')) ||
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003241 (*in == 0x09)) {
3242 in++;
3243 ctxt->input->col++;
3244 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003245 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003246 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003247 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003248 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003249 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003250 in++;
3251 }
3252 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003253 }
3254 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003255 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003256 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003257 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003258 return;
3259 }
3260 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003261 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003262 goto get_more;
3263 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003264 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003265 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003266 if ((ctxt->sax != NULL) &&
3267 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003268 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003269 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003270 const xmlChar *tmp = ctxt->input->cur;
3271 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003272
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003273 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003274 ctxt->sax->ignorableWhitespace(ctxt->userData,
3275 tmp, nbchar);
3276 } else if (ctxt->sax->characters != NULL)
3277 ctxt->sax->characters(ctxt->userData,
3278 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003279 line = ctxt->input->line;
3280 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003281 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003282 if (ctxt->sax->characters != NULL)
3283 ctxt->sax->characters(ctxt->userData,
3284 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003285 line = ctxt->input->line;
3286 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003287 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003288 }
3289 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003290 if (*in == 0xD) {
3291 in++;
3292 if (*in == 0xA) {
3293 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003294 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003295 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003296 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003297 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003298 in--;
3299 }
3300 if (*in == '<') {
3301 return;
3302 }
3303 if (*in == '&') {
3304 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 }
3306 SHRINK;
3307 GROW;
3308 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003309 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 nbchar = 0;
3311 }
Daniel Veillard50582112001-03-26 22:52:16 +00003312 ctxt->input->line = line;
3313 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003314 xmlParseCharDataComplex(ctxt, cdata);
3315}
3316
Daniel Veillard01c13b52002-12-10 15:19:08 +00003317/**
3318 * xmlParseCharDataComplex:
3319 * @ctxt: an XML parser context
3320 * @cdata: int indicating whether we are within a CDATA section
3321 *
3322 * parse a CharData section.this is the fallback function
3323 * of xmlParseCharData() when the parsing requires handling
3324 * of non-ASCII characters.
3325 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003326void
3327xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3329 int nbchar = 0;
3330 int cur, l;
3331 int count = 0;
3332
3333 SHRINK;
3334 GROW;
3335 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003336 while ((cur != '<') && /* checked */
3337 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003338 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003339 if ((cur == ']') && (NXT(1) == ']') &&
3340 (NXT(2) == '>')) {
3341 if (cdata) break;
3342 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003343 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003344 }
3345 }
3346 COPY_BUF(l,buf,nbchar,cur);
3347 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003348 buf[nbchar] = 0;
3349
Owen Taylor3473f882001-02-23 17:55:21 +00003350 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003351 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003352 */
3353 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003354 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003355 if (ctxt->sax->ignorableWhitespace != NULL)
3356 ctxt->sax->ignorableWhitespace(ctxt->userData,
3357 buf, nbchar);
3358 } else {
3359 if (ctxt->sax->characters != NULL)
3360 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3361 }
3362 }
3363 nbchar = 0;
3364 }
3365 count++;
3366 if (count > 50) {
3367 GROW;
3368 count = 0;
3369 }
3370 NEXTL(l);
3371 cur = CUR_CHAR(l);
3372 }
3373 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003374 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003375 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003376 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003377 */
3378 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003379 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003380 if (ctxt->sax->ignorableWhitespace != NULL)
3381 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3382 } else {
3383 if (ctxt->sax->characters != NULL)
3384 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3385 }
3386 }
3387 }
3388}
3389
3390/**
3391 * xmlParseExternalID:
3392 * @ctxt: an XML parser context
3393 * @publicID: a xmlChar** receiving PubidLiteral
3394 * @strict: indicate whether we should restrict parsing to only
3395 * production [75], see NOTE below
3396 *
3397 * Parse an External ID or a Public ID
3398 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003399 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003400 * 'PUBLIC' S PubidLiteral S SystemLiteral
3401 *
3402 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3403 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3404 *
3405 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3406 *
3407 * Returns the function returns SystemLiteral and in the second
3408 * case publicID receives PubidLiteral, is strict is off
3409 * it is possible to return NULL and have publicID set.
3410 */
3411
3412xmlChar *
3413xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3414 xmlChar *URI = NULL;
3415
3416 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003417
3418 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003419 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003420 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003421 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3423 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003424 }
3425 SKIP_BLANKS;
3426 URI = xmlParseSystemLiteral(ctxt);
3427 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003428 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003429 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003430 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003431 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003432 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003434 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003435 }
3436 SKIP_BLANKS;
3437 *publicID = xmlParsePubidLiteral(ctxt);
3438 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003440 }
3441 if (strict) {
3442 /*
3443 * We don't handle [83] so "S SystemLiteral" is required.
3444 */
William M. Brack76e95df2003-10-18 16:20:14 +00003445 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003446 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003447 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003448 }
3449 } else {
3450 /*
3451 * We handle [83] so we return immediately, if
3452 * "S SystemLiteral" is not detected. From a purely parsing
3453 * point of view that's a nice mess.
3454 */
3455 const xmlChar *ptr;
3456 GROW;
3457
3458 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003459 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003460
William M. Brack76e95df2003-10-18 16:20:14 +00003461 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003462 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3463 }
3464 SKIP_BLANKS;
3465 URI = xmlParseSystemLiteral(ctxt);
3466 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003467 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003468 }
3469 }
3470 return(URI);
3471}
3472
3473/**
3474 * xmlParseComment:
3475 * @ctxt: an XML parser context
3476 *
3477 * Skip an XML (SGML) comment <!-- .... -->
3478 * The spec says that "For compatibility, the string "--" (double-hyphen)
3479 * must not occur within comments. "
3480 *
3481 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3482 */
3483void
3484xmlParseComment(xmlParserCtxtPtr ctxt) {
3485 xmlChar *buf = NULL;
3486 int len;
3487 int size = XML_PARSER_BUFFER_SIZE;
3488 int q, ql;
3489 int r, rl;
3490 int cur, l;
3491 xmlParserInputState state;
3492 xmlParserInputPtr input = ctxt->input;
3493 int count = 0;
3494
3495 /*
3496 * Check that there is a comment right here.
3497 */
3498 if ((RAW != '<') || (NXT(1) != '!') ||
3499 (NXT(2) != '-') || (NXT(3) != '-')) return;
3500
3501 state = ctxt->instate;
3502 ctxt->instate = XML_PARSER_COMMENT;
3503 SHRINK;
3504 SKIP(4);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003505 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003506 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003507 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003508 ctxt->instate = state;
3509 return;
3510 }
3511 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003512 if (q == 0)
3513 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 NEXTL(ql);
3515 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003516 if (r == 0)
3517 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003518 NEXTL(rl);
3519 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003520 if (cur == 0)
3521 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003522 len = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003523 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003524 ((cur != '>') ||
3525 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003526 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003527 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003528 }
3529 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003530 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003531 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003532 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3533 if (new_buf == NULL) {
3534 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003535 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003536 ctxt->instate = state;
3537 return;
3538 }
William M. Bracka3215c72004-07-31 16:24:01 +00003539 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003540 }
3541 COPY_BUF(ql,buf,len,q);
3542 q = r;
3543 ql = rl;
3544 r = cur;
3545 rl = l;
3546
3547 count++;
3548 if (count > 50) {
3549 GROW;
3550 count = 0;
3551 }
3552 NEXTL(l);
3553 cur = CUR_CHAR(l);
3554 if (cur == 0) {
3555 SHRINK;
3556 GROW;
3557 cur = CUR_CHAR(l);
3558 }
3559 }
3560 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003561 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003562 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003563 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 xmlFree(buf);
3565 } else {
3566 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003567 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3568 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 NEXT;
3571 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3572 (!ctxt->disableSAX))
3573 ctxt->sax->comment(ctxt->userData, buf);
3574 xmlFree(buf);
3575 }
3576 ctxt->instate = state;
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003577 return;
3578not_terminated:
3579 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3580 "Comment not terminated\n", NULL);
3581 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003582}
3583
3584/**
3585 * xmlParsePITarget:
3586 * @ctxt: an XML parser context
3587 *
3588 * parse the name of a PI
3589 *
3590 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3591 *
3592 * Returns the PITarget name or NULL
3593 */
3594
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003595const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003596xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003597 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003598
3599 name = xmlParseName(ctxt);
3600 if ((name != NULL) &&
3601 ((name[0] == 'x') || (name[0] == 'X')) &&
3602 ((name[1] == 'm') || (name[1] == 'M')) &&
3603 ((name[2] == 'l') || (name[2] == 'L'))) {
3604 int i;
3605 if ((name[0] == 'x') && (name[1] == 'm') &&
3606 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003607 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003608 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003609 return(name);
3610 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003611 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003612 return(name);
3613 }
3614 for (i = 0;;i++) {
3615 if (xmlW3CPIs[i] == NULL) break;
3616 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3617 return(name);
3618 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003619 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3620 "xmlParsePITarget: invalid name prefix 'xml'\n",
3621 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003622 }
3623 return(name);
3624}
3625
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003626#ifdef LIBXML_CATALOG_ENABLED
3627/**
3628 * xmlParseCatalogPI:
3629 * @ctxt: an XML parser context
3630 * @catalog: the PI value string
3631 *
3632 * parse an XML Catalog Processing Instruction.
3633 *
3634 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3635 *
3636 * Occurs only if allowed by the user and if happening in the Misc
3637 * part of the document before any doctype informations
3638 * This will add the given catalog to the parsing context in order
3639 * to be used if there is a resolution need further down in the document
3640 */
3641
3642static void
3643xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3644 xmlChar *URL = NULL;
3645 const xmlChar *tmp, *base;
3646 xmlChar marker;
3647
3648 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003649 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003650 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3651 goto error;
3652 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003653 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003654 if (*tmp != '=') {
3655 return;
3656 }
3657 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003658 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003659 marker = *tmp;
3660 if ((marker != '\'') && (marker != '"'))
3661 goto error;
3662 tmp++;
3663 base = tmp;
3664 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3665 if (*tmp == 0)
3666 goto error;
3667 URL = xmlStrndup(base, tmp - base);
3668 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003669 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003670 if (*tmp != 0)
3671 goto error;
3672
3673 if (URL != NULL) {
3674 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3675 xmlFree(URL);
3676 }
3677 return;
3678
3679error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003680 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3681 "Catalog PI syntax error: %s\n",
3682 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003683 if (URL != NULL)
3684 xmlFree(URL);
3685}
3686#endif
3687
Owen Taylor3473f882001-02-23 17:55:21 +00003688/**
3689 * xmlParsePI:
3690 * @ctxt: an XML parser context
3691 *
3692 * parse an XML Processing Instruction.
3693 *
3694 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3695 *
3696 * The processing is transfered to SAX once parsed.
3697 */
3698
3699void
3700xmlParsePI(xmlParserCtxtPtr ctxt) {
3701 xmlChar *buf = NULL;
3702 int len = 0;
3703 int size = XML_PARSER_BUFFER_SIZE;
3704 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003705 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003706 xmlParserInputState state;
3707 int count = 0;
3708
3709 if ((RAW == '<') && (NXT(1) == '?')) {
3710 xmlParserInputPtr input = ctxt->input;
3711 state = ctxt->instate;
3712 ctxt->instate = XML_PARSER_PI;
3713 /*
3714 * this is a Processing Instruction.
3715 */
3716 SKIP(2);
3717 SHRINK;
3718
3719 /*
3720 * Parse the target name and check for special support like
3721 * namespace.
3722 */
3723 target = xmlParsePITarget(ctxt);
3724 if (target != NULL) {
3725 if ((RAW == '?') && (NXT(1) == '>')) {
3726 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003727 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3728 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 SKIP(2);
3731
3732 /*
3733 * SAX: PI detected.
3734 */
3735 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3736 (ctxt->sax->processingInstruction != NULL))
3737 ctxt->sax->processingInstruction(ctxt->userData,
3738 target, NULL);
3739 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003740 return;
3741 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003742 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003743 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003744 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 ctxt->instate = state;
3746 return;
3747 }
3748 cur = CUR;
3749 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003750 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3751 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 SKIP_BLANKS;
3754 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003755 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003756 ((cur != '?') || (NXT(1) != '>'))) {
3757 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003758 xmlChar *tmp;
3759
Owen Taylor3473f882001-02-23 17:55:21 +00003760 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003761 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3762 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003763 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003764 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 ctxt->instate = state;
3766 return;
3767 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003768 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 count++;
3771 if (count > 50) {
3772 GROW;
3773 count = 0;
3774 }
3775 COPY_BUF(l,buf,len,cur);
3776 NEXTL(l);
3777 cur = CUR_CHAR(l);
3778 if (cur == 0) {
3779 SHRINK;
3780 GROW;
3781 cur = CUR_CHAR(l);
3782 }
3783 }
3784 buf[len] = 0;
3785 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003786 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3787 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003788 } else {
3789 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003790 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3791 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003792 }
3793 SKIP(2);
3794
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003795#ifdef LIBXML_CATALOG_ENABLED
3796 if (((state == XML_PARSER_MISC) ||
3797 (state == XML_PARSER_START)) &&
3798 (xmlStrEqual(target, XML_CATALOG_PI))) {
3799 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3800 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3801 (allow == XML_CATA_ALLOW_ALL))
3802 xmlParseCatalogPI(ctxt, buf);
3803 }
3804#endif
3805
3806
Owen Taylor3473f882001-02-23 17:55:21 +00003807 /*
3808 * SAX: PI detected.
3809 */
3810 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3811 (ctxt->sax->processingInstruction != NULL))
3812 ctxt->sax->processingInstruction(ctxt->userData,
3813 target, buf);
3814 }
3815 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003816 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003817 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003818 }
3819 ctxt->instate = state;
3820 }
3821}
3822
3823/**
3824 * xmlParseNotationDecl:
3825 * @ctxt: an XML parser context
3826 *
3827 * parse a notation declaration
3828 *
3829 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3830 *
3831 * Hence there is actually 3 choices:
3832 * 'PUBLIC' S PubidLiteral
3833 * 'PUBLIC' S PubidLiteral S SystemLiteral
3834 * and 'SYSTEM' S SystemLiteral
3835 *
3836 * See the NOTE on xmlParseExternalID().
3837 */
3838
3839void
3840xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003841 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003842 xmlChar *Pubid;
3843 xmlChar *Systemid;
3844
Daniel Veillarda07050d2003-10-19 14:46:32 +00003845 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003846 xmlParserInputPtr input = ctxt->input;
3847 SHRINK;
3848 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003849 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003850 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3851 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003852 return;
3853 }
3854 SKIP_BLANKS;
3855
Daniel Veillard76d66f42001-05-16 21:05:17 +00003856 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003858 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003859 return;
3860 }
William M. Brack76e95df2003-10-18 16:20:14 +00003861 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003862 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003863 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003864 return;
3865 }
3866 SKIP_BLANKS;
3867
3868 /*
3869 * Parse the IDs.
3870 */
3871 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3872 SKIP_BLANKS;
3873
3874 if (RAW == '>') {
3875 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003876 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3877 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003878 }
3879 NEXT;
3880 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3881 (ctxt->sax->notationDecl != NULL))
3882 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3883 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003884 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003885 }
Owen Taylor3473f882001-02-23 17:55:21 +00003886 if (Systemid != NULL) xmlFree(Systemid);
3887 if (Pubid != NULL) xmlFree(Pubid);
3888 }
3889}
3890
3891/**
3892 * xmlParseEntityDecl:
3893 * @ctxt: an XML parser context
3894 *
3895 * parse <!ENTITY declarations
3896 *
3897 * [70] EntityDecl ::= GEDecl | PEDecl
3898 *
3899 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3900 *
3901 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3902 *
3903 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3904 *
3905 * [74] PEDef ::= EntityValue | ExternalID
3906 *
3907 * [76] NDataDecl ::= S 'NDATA' S Name
3908 *
3909 * [ VC: Notation Declared ]
3910 * The Name must match the declared name of a notation.
3911 */
3912
3913void
3914xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003915 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003916 xmlChar *value = NULL;
3917 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003918 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003919 int isParameter = 0;
3920 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003921 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00003922
3923 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003924 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003925 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00003926 SHRINK;
3927 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00003928 skipped = SKIP_BLANKS;
3929 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3931 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003932 }
Owen Taylor3473f882001-02-23 17:55:21 +00003933
3934 if (RAW == '%') {
3935 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00003936 skipped = SKIP_BLANKS;
3937 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3939 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003940 }
Owen Taylor3473f882001-02-23 17:55:21 +00003941 isParameter = 1;
3942 }
3943
Daniel Veillard76d66f42001-05-16 21:05:17 +00003944 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003945 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003946 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
3947 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003948 return;
3949 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00003950 skipped = SKIP_BLANKS;
3951 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3953 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003954 }
Owen Taylor3473f882001-02-23 17:55:21 +00003955
Daniel Veillardf5582f12002-06-11 10:08:16 +00003956 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00003957 /*
3958 * handle the various case of definitions...
3959 */
3960 if (isParameter) {
3961 if ((RAW == '"') || (RAW == '\'')) {
3962 value = xmlParseEntityValue(ctxt, &orig);
3963 if (value) {
3964 if ((ctxt->sax != NULL) &&
3965 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3966 ctxt->sax->entityDecl(ctxt->userData, name,
3967 XML_INTERNAL_PARAMETER_ENTITY,
3968 NULL, NULL, value);
3969 }
3970 } else {
3971 URI = xmlParseExternalID(ctxt, &literal, 1);
3972 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003973 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974 }
3975 if (URI) {
3976 xmlURIPtr uri;
3977
3978 uri = xmlParseURI((const char *) URI);
3979 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00003980 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
3981 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003982 /*
3983 * This really ought to be a well formedness error
3984 * but the XML Core WG decided otherwise c.f. issue
3985 * E26 of the XML erratas.
3986 */
Owen Taylor3473f882001-02-23 17:55:21 +00003987 } else {
3988 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003989 /*
3990 * Okay this is foolish to block those but not
3991 * invalid URIs.
3992 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003993 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003994 } else {
3995 if ((ctxt->sax != NULL) &&
3996 (!ctxt->disableSAX) &&
3997 (ctxt->sax->entityDecl != NULL))
3998 ctxt->sax->entityDecl(ctxt->userData, name,
3999 XML_EXTERNAL_PARAMETER_ENTITY,
4000 literal, URI, NULL);
4001 }
4002 xmlFreeURI(uri);
4003 }
4004 }
4005 }
4006 } else {
4007 if ((RAW == '"') || (RAW == '\'')) {
4008 value = xmlParseEntityValue(ctxt, &orig);
4009 if ((ctxt->sax != NULL) &&
4010 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4011 ctxt->sax->entityDecl(ctxt->userData, name,
4012 XML_INTERNAL_GENERAL_ENTITY,
4013 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004014 /*
4015 * For expat compatibility in SAX mode.
4016 */
4017 if ((ctxt->myDoc == NULL) ||
4018 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4019 if (ctxt->myDoc == NULL) {
4020 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4021 }
4022 if (ctxt->myDoc->intSubset == NULL)
4023 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4024 BAD_CAST "fake", NULL, NULL);
4025
Daniel Veillard1af9a412003-08-20 22:54:39 +00004026 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4027 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004028 }
Owen Taylor3473f882001-02-23 17:55:21 +00004029 } else {
4030 URI = xmlParseExternalID(ctxt, &literal, 1);
4031 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004032 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 if (URI) {
4035 xmlURIPtr uri;
4036
4037 uri = xmlParseURI((const char *)URI);
4038 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004039 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4040 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004041 /*
4042 * This really ought to be a well formedness error
4043 * but the XML Core WG decided otherwise c.f. issue
4044 * E26 of the XML erratas.
4045 */
Owen Taylor3473f882001-02-23 17:55:21 +00004046 } else {
4047 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004048 /*
4049 * Okay this is foolish to block those but not
4050 * invalid URIs.
4051 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004052 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004053 }
4054 xmlFreeURI(uri);
4055 }
4056 }
William M. Brack76e95df2003-10-18 16:20:14 +00004057 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004058 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4059 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004060 }
4061 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004062 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004063 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004064 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004065 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4066 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
4068 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004069 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4071 (ctxt->sax->unparsedEntityDecl != NULL))
4072 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4073 literal, URI, ndata);
4074 } else {
4075 if ((ctxt->sax != NULL) &&
4076 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4077 ctxt->sax->entityDecl(ctxt->userData, name,
4078 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4079 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004080 /*
4081 * For expat compatibility in SAX mode.
4082 * assuming the entity repalcement was asked for
4083 */
4084 if ((ctxt->replaceEntities != 0) &&
4085 ((ctxt->myDoc == NULL) ||
4086 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4087 if (ctxt->myDoc == NULL) {
4088 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4089 }
4090
4091 if (ctxt->myDoc->intSubset == NULL)
4092 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4093 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004094 xmlSAX2EntityDecl(ctxt, name,
4095 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4096 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004097 }
Owen Taylor3473f882001-02-23 17:55:21 +00004098 }
4099 }
4100 }
4101 SKIP_BLANKS;
4102 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004103 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004104 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004105 } else {
4106 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004107 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4108 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004109 }
4110 NEXT;
4111 }
4112 if (orig != NULL) {
4113 /*
4114 * Ugly mechanism to save the raw entity value.
4115 */
4116 xmlEntityPtr cur = NULL;
4117
4118 if (isParameter) {
4119 if ((ctxt->sax != NULL) &&
4120 (ctxt->sax->getParameterEntity != NULL))
4121 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4122 } else {
4123 if ((ctxt->sax != NULL) &&
4124 (ctxt->sax->getEntity != NULL))
4125 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004126 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004127 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004128 }
Owen Taylor3473f882001-02-23 17:55:21 +00004129 }
4130 if (cur != NULL) {
4131 if (cur->orig != NULL)
4132 xmlFree(orig);
4133 else
4134 cur->orig = orig;
4135 } else
4136 xmlFree(orig);
4137 }
Owen Taylor3473f882001-02-23 17:55:21 +00004138 if (value != NULL) xmlFree(value);
4139 if (URI != NULL) xmlFree(URI);
4140 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004141 }
4142}
4143
4144/**
4145 * xmlParseDefaultDecl:
4146 * @ctxt: an XML parser context
4147 * @value: Receive a possible fixed default value for the attribute
4148 *
4149 * Parse an attribute default declaration
4150 *
4151 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4152 *
4153 * [ VC: Required Attribute ]
4154 * if the default declaration is the keyword #REQUIRED, then the
4155 * attribute must be specified for all elements of the type in the
4156 * attribute-list declaration.
4157 *
4158 * [ VC: Attribute Default Legal ]
4159 * The declared default value must meet the lexical constraints of
4160 * the declared attribute type c.f. xmlValidateAttributeDecl()
4161 *
4162 * [ VC: Fixed Attribute Default ]
4163 * if an attribute has a default value declared with the #FIXED
4164 * keyword, instances of that attribute must match the default value.
4165 *
4166 * [ WFC: No < in Attribute Values ]
4167 * handled in xmlParseAttValue()
4168 *
4169 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4170 * or XML_ATTRIBUTE_FIXED.
4171 */
4172
4173int
4174xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4175 int val;
4176 xmlChar *ret;
4177
4178 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004179 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004180 SKIP(9);
4181 return(XML_ATTRIBUTE_REQUIRED);
4182 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004183 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004184 SKIP(8);
4185 return(XML_ATTRIBUTE_IMPLIED);
4186 }
4187 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004188 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004189 SKIP(6);
4190 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004191 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004192 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4193 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004194 }
4195 SKIP_BLANKS;
4196 }
4197 ret = xmlParseAttValue(ctxt);
4198 ctxt->instate = XML_PARSER_DTD;
4199 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004200 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004201 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004202 } else
4203 *value = ret;
4204 return(val);
4205}
4206
4207/**
4208 * xmlParseNotationType:
4209 * @ctxt: an XML parser context
4210 *
4211 * parse an Notation attribute type.
4212 *
4213 * Note: the leading 'NOTATION' S part has already being parsed...
4214 *
4215 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4216 *
4217 * [ VC: Notation Attributes ]
4218 * Values of this type must match one of the notation names included
4219 * in the declaration; all notation names in the declaration must be declared.
4220 *
4221 * Returns: the notation attribute tree built while parsing
4222 */
4223
4224xmlEnumerationPtr
4225xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004226 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004227 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4228
4229 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004230 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004231 return(NULL);
4232 }
4233 SHRINK;
4234 do {
4235 NEXT;
4236 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004237 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004239 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4240 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004241 return(ret);
4242 }
4243 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004244 if (cur == NULL) return(ret);
4245 if (last == NULL) ret = last = cur;
4246 else {
4247 last->next = cur;
4248 last = cur;
4249 }
4250 SKIP_BLANKS;
4251 } while (RAW == '|');
4252 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004253 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004254 if ((last != NULL) && (last != ret))
4255 xmlFreeEnumeration(last);
4256 return(ret);
4257 }
4258 NEXT;
4259 return(ret);
4260}
4261
4262/**
4263 * xmlParseEnumerationType:
4264 * @ctxt: an XML parser context
4265 *
4266 * parse an Enumeration attribute type.
4267 *
4268 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4269 *
4270 * [ VC: Enumeration ]
4271 * Values of this type must match one of the Nmtoken tokens in
4272 * the declaration
4273 *
4274 * Returns: the enumeration attribute tree built while parsing
4275 */
4276
4277xmlEnumerationPtr
4278xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4279 xmlChar *name;
4280 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4281
4282 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004283 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004284 return(NULL);
4285 }
4286 SHRINK;
4287 do {
4288 NEXT;
4289 SKIP_BLANKS;
4290 name = xmlParseNmtoken(ctxt);
4291 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004292 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004293 return(ret);
4294 }
4295 cur = xmlCreateEnumeration(name);
4296 xmlFree(name);
4297 if (cur == NULL) return(ret);
4298 if (last == NULL) ret = last = cur;
4299 else {
4300 last->next = cur;
4301 last = cur;
4302 }
4303 SKIP_BLANKS;
4304 } while (RAW == '|');
4305 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004306 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004307 return(ret);
4308 }
4309 NEXT;
4310 return(ret);
4311}
4312
4313/**
4314 * xmlParseEnumeratedType:
4315 * @ctxt: an XML parser context
4316 * @tree: the enumeration tree built while parsing
4317 *
4318 * parse an Enumerated attribute type.
4319 *
4320 * [57] EnumeratedType ::= NotationType | Enumeration
4321 *
4322 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4323 *
4324 *
4325 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4326 */
4327
4328int
4329xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004330 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004331 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004332 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004333 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4334 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004335 return(0);
4336 }
4337 SKIP_BLANKS;
4338 *tree = xmlParseNotationType(ctxt);
4339 if (*tree == NULL) return(0);
4340 return(XML_ATTRIBUTE_NOTATION);
4341 }
4342 *tree = xmlParseEnumerationType(ctxt);
4343 if (*tree == NULL) return(0);
4344 return(XML_ATTRIBUTE_ENUMERATION);
4345}
4346
4347/**
4348 * xmlParseAttributeType:
4349 * @ctxt: an XML parser context
4350 * @tree: the enumeration tree built while parsing
4351 *
4352 * parse the Attribute list def for an element
4353 *
4354 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4355 *
4356 * [55] StringType ::= 'CDATA'
4357 *
4358 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4359 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4360 *
4361 * Validity constraints for attribute values syntax are checked in
4362 * xmlValidateAttributeValue()
4363 *
4364 * [ VC: ID ]
4365 * Values of type ID must match the Name production. A name must not
4366 * appear more than once in an XML document as a value of this type;
4367 * i.e., ID values must uniquely identify the elements which bear them.
4368 *
4369 * [ VC: One ID per Element Type ]
4370 * No element type may have more than one ID attribute specified.
4371 *
4372 * [ VC: ID Attribute Default ]
4373 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4374 *
4375 * [ VC: IDREF ]
4376 * Values of type IDREF must match the Name production, and values
4377 * of type IDREFS must match Names; each IDREF Name must match the value
4378 * of an ID attribute on some element in the XML document; i.e. IDREF
4379 * values must match the value of some ID attribute.
4380 *
4381 * [ VC: Entity Name ]
4382 * Values of type ENTITY must match the Name production, values
4383 * of type ENTITIES must match Names; each Entity Name must match the
4384 * name of an unparsed entity declared in the DTD.
4385 *
4386 * [ VC: Name Token ]
4387 * Values of type NMTOKEN must match the Nmtoken production; values
4388 * of type NMTOKENS must match Nmtokens.
4389 *
4390 * Returns the attribute type
4391 */
4392int
4393xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4394 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004395 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004396 SKIP(5);
4397 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004398 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004399 SKIP(6);
4400 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004401 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004402 SKIP(5);
4403 return(XML_ATTRIBUTE_IDREF);
4404 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4405 SKIP(2);
4406 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004407 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004408 SKIP(6);
4409 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004410 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004411 SKIP(8);
4412 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004413 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004414 SKIP(8);
4415 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004416 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004417 SKIP(7);
4418 return(XML_ATTRIBUTE_NMTOKEN);
4419 }
4420 return(xmlParseEnumeratedType(ctxt, tree));
4421}
4422
4423/**
4424 * xmlParseAttributeListDecl:
4425 * @ctxt: an XML parser context
4426 *
4427 * : parse the Attribute list def for an element
4428 *
4429 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4430 *
4431 * [53] AttDef ::= S Name S AttType S DefaultDecl
4432 *
4433 */
4434void
4435xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004436 const xmlChar *elemName;
4437 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004438 xmlEnumerationPtr tree;
4439
Daniel Veillarda07050d2003-10-19 14:46:32 +00004440 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004441 xmlParserInputPtr input = ctxt->input;
4442
4443 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004444 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004445 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004446 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004447 }
4448 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004449 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004450 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4452 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004453 return;
4454 }
4455 SKIP_BLANKS;
4456 GROW;
4457 while (RAW != '>') {
4458 const xmlChar *check = CUR_PTR;
4459 int type;
4460 int def;
4461 xmlChar *defaultValue = NULL;
4462
4463 GROW;
4464 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004465 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004466 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004467 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4468 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004469 break;
4470 }
4471 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004472 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004474 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004475 if (defaultValue != NULL)
4476 xmlFree(defaultValue);
4477 break;
4478 }
4479 SKIP_BLANKS;
4480
4481 type = xmlParseAttributeType(ctxt, &tree);
4482 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004483 if (defaultValue != NULL)
4484 xmlFree(defaultValue);
4485 break;
4486 }
4487
4488 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004489 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004490 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4491 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004492 if (defaultValue != NULL)
4493 xmlFree(defaultValue);
4494 if (tree != NULL)
4495 xmlFreeEnumeration(tree);
4496 break;
4497 }
4498 SKIP_BLANKS;
4499
4500 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4501 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004502 if (defaultValue != NULL)
4503 xmlFree(defaultValue);
4504 if (tree != NULL)
4505 xmlFreeEnumeration(tree);
4506 break;
4507 }
4508
4509 GROW;
4510 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004513 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 if (defaultValue != NULL)
4515 xmlFree(defaultValue);
4516 if (tree != NULL)
4517 xmlFreeEnumeration(tree);
4518 break;
4519 }
4520 SKIP_BLANKS;
4521 }
4522 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004523 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4524 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004525 if (defaultValue != NULL)
4526 xmlFree(defaultValue);
4527 if (tree != NULL)
4528 xmlFreeEnumeration(tree);
4529 break;
4530 }
4531 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4532 (ctxt->sax->attributeDecl != NULL))
4533 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4534 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004535 else if (tree != NULL)
4536 xmlFreeEnumeration(tree);
4537
4538 if ((ctxt->sax2) && (defaultValue != NULL) &&
4539 (def != XML_ATTRIBUTE_IMPLIED) &&
4540 (def != XML_ATTRIBUTE_REQUIRED)) {
4541 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4542 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004543 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4544 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4545 }
Owen Taylor3473f882001-02-23 17:55:21 +00004546 if (defaultValue != NULL)
4547 xmlFree(defaultValue);
4548 GROW;
4549 }
4550 if (RAW == '>') {
4551 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004552 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4553 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004554 }
4555 NEXT;
4556 }
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558}
4559
4560/**
4561 * xmlParseElementMixedContentDecl:
4562 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004563 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004564 *
4565 * parse the declaration for a Mixed Element content
4566 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4567 *
4568 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4569 * '(' S? '#PCDATA' S? ')'
4570 *
4571 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4572 *
4573 * [ VC: No Duplicate Types ]
4574 * The same name must not appear more than once in a single
4575 * mixed-content declaration.
4576 *
4577 * returns: the list of the xmlElementContentPtr describing the element choices
4578 */
4579xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004580xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004581 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004582 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004583
4584 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004585 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 SKIP(7);
4587 SKIP_BLANKS;
4588 SHRINK;
4589 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004590 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004591 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4592"Element content declaration doesn't start and stop in the same entity\n",
4593 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004594 }
Owen Taylor3473f882001-02-23 17:55:21 +00004595 NEXT;
4596 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4597 if (RAW == '*') {
4598 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4599 NEXT;
4600 }
4601 return(ret);
4602 }
4603 if ((RAW == '(') || (RAW == '|')) {
4604 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4605 if (ret == NULL) return(NULL);
4606 }
4607 while (RAW == '|') {
4608 NEXT;
4609 if (elem == NULL) {
4610 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4611 if (ret == NULL) return(NULL);
4612 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004613 if (cur != NULL)
4614 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004615 cur = ret;
4616 } else {
4617 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4618 if (n == NULL) return(NULL);
4619 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004620 if (n->c1 != NULL)
4621 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004623 if (n != NULL)
4624 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004625 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004626 }
4627 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004628 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004629 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004631 "xmlParseElementMixedContentDecl : Name expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004632 xmlFreeElementContent(cur);
4633 return(NULL);
4634 }
4635 SKIP_BLANKS;
4636 GROW;
4637 }
4638 if ((RAW == ')') && (NXT(1) == '*')) {
4639 if (elem != NULL) {
4640 cur->c2 = xmlNewElementContent(elem,
4641 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004642 if (cur->c2 != NULL)
4643 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004644 }
4645 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004646 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004647 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4648"Element content declaration doesn't start and stop in the same entity\n",
4649 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004650 }
Owen Taylor3473f882001-02-23 17:55:21 +00004651 SKIP(2);
4652 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00004653 xmlFreeElementContent(ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004654 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004655 return(NULL);
4656 }
4657
4658 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004659 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004660 }
4661 return(ret);
4662}
4663
4664/**
4665 * xmlParseElementChildrenContentDecl:
4666 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004667 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004668 *
4669 * parse the declaration for a Mixed Element content
4670 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4671 *
4672 *
4673 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4674 *
4675 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4676 *
4677 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4678 *
4679 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4680 *
4681 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4682 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004683 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004684 * opening or closing parentheses in a choice, seq, or Mixed
4685 * construct is contained in the replacement text for a parameter
4686 * entity, both must be contained in the same replacement text. For
4687 * interoperability, if a parameter-entity reference appears in a
4688 * choice, seq, or Mixed construct, its replacement text should not
4689 * be empty, and neither the first nor last non-blank character of
4690 * the replacement text should be a connector (| or ,).
4691 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004692 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004693 * hierarchy.
4694 */
4695xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004696xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004697 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004698 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004699 xmlChar type = 0;
4700
4701 SKIP_BLANKS;
4702 GROW;
4703 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004704 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004705
Owen Taylor3473f882001-02-23 17:55:21 +00004706 /* Recurse on first child */
4707 NEXT;
4708 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004709 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 SKIP_BLANKS;
4711 GROW;
4712 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004713 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004714 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004715 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004716 return(NULL);
4717 }
4718 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004719 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004720 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004721 return(NULL);
4722 }
Owen Taylor3473f882001-02-23 17:55:21 +00004723 GROW;
4724 if (RAW == '?') {
4725 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4726 NEXT;
4727 } else if (RAW == '*') {
4728 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4729 NEXT;
4730 } else if (RAW == '+') {
4731 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4732 NEXT;
4733 } else {
4734 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4735 }
Owen Taylor3473f882001-02-23 17:55:21 +00004736 GROW;
4737 }
4738 SKIP_BLANKS;
4739 SHRINK;
4740 while (RAW != ')') {
4741 /*
4742 * Each loop we parse one separator and one element.
4743 */
4744 if (RAW == ',') {
4745 if (type == 0) type = CUR;
4746
4747 /*
4748 * Detect "Name | Name , Name" error
4749 */
4750 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004751 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004752 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004753 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004754 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004755 xmlFreeElementContent(last);
4756 if (ret != NULL)
4757 xmlFreeElementContent(ret);
4758 return(NULL);
4759 }
4760 NEXT;
4761
4762 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4763 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004764 if ((last != NULL) && (last != ret))
4765 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004766 xmlFreeElementContent(ret);
4767 return(NULL);
4768 }
4769 if (last == NULL) {
4770 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004771 if (ret != NULL)
4772 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004773 ret = cur = op;
4774 } else {
4775 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004776 if (op != NULL)
4777 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004778 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004779 if (last != NULL)
4780 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004781 cur =op;
4782 last = NULL;
4783 }
4784 } else if (RAW == '|') {
4785 if (type == 0) type = CUR;
4786
4787 /*
4788 * Detect "Name , Name | Name" error
4789 */
4790 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004791 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004792 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004793 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004794 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004795 xmlFreeElementContent(last);
4796 if (ret != NULL)
4797 xmlFreeElementContent(ret);
4798 return(NULL);
4799 }
4800 NEXT;
4801
4802 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4803 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004804 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004805 xmlFreeElementContent(last);
4806 if (ret != NULL)
4807 xmlFreeElementContent(ret);
4808 return(NULL);
4809 }
4810 if (last == NULL) {
4811 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004812 if (ret != NULL)
4813 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004814 ret = cur = op;
4815 } else {
4816 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004817 if (op != NULL)
4818 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004819 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004820 if (last != NULL)
4821 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004822 cur =op;
4823 last = NULL;
4824 }
4825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004826 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004827 if (ret != NULL)
4828 xmlFreeElementContent(ret);
4829 return(NULL);
4830 }
4831 GROW;
4832 SKIP_BLANKS;
4833 GROW;
4834 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004835 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004836 /* Recurse on second child */
4837 NEXT;
4838 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004839 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 SKIP_BLANKS;
4841 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004842 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004843 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004844 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004845 if (ret != NULL)
4846 xmlFreeElementContent(ret);
4847 return(NULL);
4848 }
4849 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004850 if (RAW == '?') {
4851 last->ocur = XML_ELEMENT_CONTENT_OPT;
4852 NEXT;
4853 } else if (RAW == '*') {
4854 last->ocur = XML_ELEMENT_CONTENT_MULT;
4855 NEXT;
4856 } else if (RAW == '+') {
4857 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4858 NEXT;
4859 } else {
4860 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4861 }
4862 }
4863 SKIP_BLANKS;
4864 GROW;
4865 }
4866 if ((cur != NULL) && (last != NULL)) {
4867 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004868 if (last != NULL)
4869 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004870 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004871 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004872 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4873"Element content declaration doesn't start and stop in the same entity\n",
4874 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004875 }
Owen Taylor3473f882001-02-23 17:55:21 +00004876 NEXT;
4877 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004878 if (ret != NULL) {
4879 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
4880 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4881 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4882 else
4883 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4884 }
Owen Taylor3473f882001-02-23 17:55:21 +00004885 NEXT;
4886 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004887 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004888 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004889 cur = ret;
4890 /*
4891 * Some normalization:
4892 * (a | b* | c?)* == (a | b | c)*
4893 */
4894 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4895 if ((cur->c1 != NULL) &&
4896 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4897 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4898 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4899 if ((cur->c2 != NULL) &&
4900 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4901 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4902 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4903 cur = cur->c2;
4904 }
4905 }
Owen Taylor3473f882001-02-23 17:55:21 +00004906 NEXT;
4907 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004908 if (ret != NULL) {
4909 int found = 0;
4910
William M. Brackf8f2e8f2004-05-14 04:37:41 +00004911 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
4912 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
4913 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00004914 else
4915 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004916 /*
4917 * Some normalization:
4918 * (a | b*)+ == (a | b)*
4919 * (a | b?)+ == (a | b)*
4920 */
4921 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4922 if ((cur->c1 != NULL) &&
4923 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4924 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4925 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4926 found = 1;
4927 }
4928 if ((cur->c2 != NULL) &&
4929 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4930 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4931 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4932 found = 1;
4933 }
4934 cur = cur->c2;
4935 }
4936 if (found)
4937 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4938 }
Owen Taylor3473f882001-02-23 17:55:21 +00004939 NEXT;
4940 }
4941 return(ret);
4942}
4943
4944/**
4945 * xmlParseElementContentDecl:
4946 * @ctxt: an XML parser context
4947 * @name: the name of the element being defined.
4948 * @result: the Element Content pointer will be stored here if any
4949 *
4950 * parse the declaration for an Element content either Mixed or Children,
4951 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4952 *
4953 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4954 *
4955 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4956 */
4957
4958int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004959xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00004960 xmlElementContentPtr *result) {
4961
4962 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004963 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004964 int res;
4965
4966 *result = NULL;
4967
4968 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004969 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004970 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004971 return(-1);
4972 }
4973 NEXT;
4974 GROW;
4975 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004976 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004977 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004978 res = XML_ELEMENT_TYPE_MIXED;
4979 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004981 res = XML_ELEMENT_TYPE_ELEMENT;
4982 }
Owen Taylor3473f882001-02-23 17:55:21 +00004983 SKIP_BLANKS;
4984 *result = tree;
4985 return(res);
4986}
4987
4988/**
4989 * xmlParseElementDecl:
4990 * @ctxt: an XML parser context
4991 *
4992 * parse an Element declaration.
4993 *
4994 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4995 *
4996 * [ VC: Unique Element Type Declaration ]
4997 * No element type may be declared more than once
4998 *
4999 * Returns the type of the element, or -1 in case of error
5000 */
5001int
5002xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005003 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005004 int ret = -1;
5005 xmlElementContentPtr content = NULL;
5006
5007 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005008 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005009 xmlParserInputPtr input = ctxt->input;
5010
5011 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005012 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5014 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
5016 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005017 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005018 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005019 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5020 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005021 return(-1);
5022 }
5023 while ((RAW == 0) && (ctxt->inputNr > 1))
5024 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005025 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005026 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5027 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005028 }
5029 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005030 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005031 SKIP(5);
5032 /*
5033 * Element must always be empty.
5034 */
5035 ret = XML_ELEMENT_TYPE_EMPTY;
5036 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5037 (NXT(2) == 'Y')) {
5038 SKIP(3);
5039 /*
5040 * Element is a generic container.
5041 */
5042 ret = XML_ELEMENT_TYPE_ANY;
5043 } else if (RAW == '(') {
5044 ret = xmlParseElementContentDecl(ctxt, name, &content);
5045 } else {
5046 /*
5047 * [ WFC: PEs in Internal Subset ] error handling.
5048 */
5049 if ((RAW == '%') && (ctxt->external == 0) &&
5050 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005051 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005052 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005053 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005054 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005055 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5056 }
Owen Taylor3473f882001-02-23 17:55:21 +00005057 return(-1);
5058 }
5059
5060 SKIP_BLANKS;
5061 /*
5062 * Pop-up of finished entities.
5063 */
5064 while ((RAW == 0) && (ctxt->inputNr > 1))
5065 xmlPopInput(ctxt);
5066 SKIP_BLANKS;
5067
5068 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005069 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 } else {
5071 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005072 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5073 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005074 }
5075
5076 NEXT;
5077 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5078 (ctxt->sax->elementDecl != NULL))
5079 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5080 content);
5081 }
5082 if (content != NULL) {
5083 xmlFreeElementContent(content);
5084 }
Owen Taylor3473f882001-02-23 17:55:21 +00005085 }
5086 return(ret);
5087}
5088
5089/**
Owen Taylor3473f882001-02-23 17:55:21 +00005090 * xmlParseConditionalSections
5091 * @ctxt: an XML parser context
5092 *
5093 * [61] conditionalSect ::= includeSect | ignoreSect
5094 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5095 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5096 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5097 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5098 */
5099
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005100static void
Owen Taylor3473f882001-02-23 17:55:21 +00005101xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5102 SKIP(3);
5103 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005104 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005105 SKIP(7);
5106 SKIP_BLANKS;
5107 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005108 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 NEXT;
5111 }
5112 if (xmlParserDebugEntities) {
5113 if ((ctxt->input != NULL) && (ctxt->input->filename))
5114 xmlGenericError(xmlGenericErrorContext,
5115 "%s(%d): ", ctxt->input->filename,
5116 ctxt->input->line);
5117 xmlGenericError(xmlGenericErrorContext,
5118 "Entering INCLUDE Conditional Section\n");
5119 }
5120
5121 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5122 (NXT(2) != '>'))) {
5123 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005124 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005125
5126 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5127 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005128 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005129 NEXT;
5130 } else if (RAW == '%') {
5131 xmlParsePEReference(ctxt);
5132 } else
5133 xmlParseMarkupDecl(ctxt);
5134
5135 /*
5136 * Pop-up of finished entities.
5137 */
5138 while ((RAW == 0) && (ctxt->inputNr > 1))
5139 xmlPopInput(ctxt);
5140
Daniel Veillardfdc91562002-07-01 21:52:03 +00005141 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005142 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005143 break;
5144 }
5145 }
5146 if (xmlParserDebugEntities) {
5147 if ((ctxt->input != NULL) && (ctxt->input->filename))
5148 xmlGenericError(xmlGenericErrorContext,
5149 "%s(%d): ", ctxt->input->filename,
5150 ctxt->input->line);
5151 xmlGenericError(xmlGenericErrorContext,
5152 "Leaving INCLUDE Conditional Section\n");
5153 }
5154
Daniel Veillarda07050d2003-10-19 14:46:32 +00005155 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005156 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005157 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 int depth = 0;
5159
5160 SKIP(6);
5161 SKIP_BLANKS;
5162 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005163 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005164 } else {
5165 NEXT;
5166 }
5167 if (xmlParserDebugEntities) {
5168 if ((ctxt->input != NULL) && (ctxt->input->filename))
5169 xmlGenericError(xmlGenericErrorContext,
5170 "%s(%d): ", ctxt->input->filename,
5171 ctxt->input->line);
5172 xmlGenericError(xmlGenericErrorContext,
5173 "Entering IGNORE Conditional Section\n");
5174 }
5175
5176 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005177 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005178 * But disable SAX event generating DTD building in the meantime
5179 */
5180 state = ctxt->disableSAX;
5181 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005182 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005183 ctxt->instate = XML_PARSER_IGNORE;
5184
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005185 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005186 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5187 depth++;
5188 SKIP(3);
5189 continue;
5190 }
5191 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5192 if (--depth >= 0) SKIP(3);
5193 continue;
5194 }
5195 NEXT;
5196 continue;
5197 }
5198
5199 ctxt->disableSAX = state;
5200 ctxt->instate = instate;
5201
5202 if (xmlParserDebugEntities) {
5203 if ((ctxt->input != NULL) && (ctxt->input->filename))
5204 xmlGenericError(xmlGenericErrorContext,
5205 "%s(%d): ", ctxt->input->filename,
5206 ctxt->input->line);
5207 xmlGenericError(xmlGenericErrorContext,
5208 "Leaving IGNORE Conditional Section\n");
5209 }
5210
5211 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005212 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005213 }
5214
5215 if (RAW == 0)
5216 SHRINK;
5217
5218 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005219 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005220 } else {
5221 SKIP(3);
5222 }
5223}
5224
5225/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005226 * xmlParseMarkupDecl:
5227 * @ctxt: an XML parser context
5228 *
5229 * parse Markup declarations
5230 *
5231 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5232 * NotationDecl | PI | Comment
5233 *
5234 * [ VC: Proper Declaration/PE Nesting ]
5235 * Parameter-entity replacement text must be properly nested with
5236 * markup declarations. That is to say, if either the first character
5237 * or the last character of a markup declaration (markupdecl above) is
5238 * contained in the replacement text for a parameter-entity reference,
5239 * both must be contained in the same replacement text.
5240 *
5241 * [ WFC: PEs in Internal Subset ]
5242 * In the internal DTD subset, parameter-entity references can occur
5243 * only where markup declarations can occur, not within markup declarations.
5244 * (This does not apply to references that occur in external parameter
5245 * entities or to the external subset.)
5246 */
5247void
5248xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5249 GROW;
5250 xmlParseElementDecl(ctxt);
5251 xmlParseAttributeListDecl(ctxt);
5252 xmlParseEntityDecl(ctxt);
5253 xmlParseNotationDecl(ctxt);
5254 xmlParsePI(ctxt);
5255 xmlParseComment(ctxt);
5256 /*
5257 * This is only for internal subset. On external entities,
5258 * the replacement is done before parsing stage
5259 */
5260 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5261 xmlParsePEReference(ctxt);
5262
5263 /*
5264 * Conditional sections are allowed from entities included
5265 * by PE References in the internal subset.
5266 */
5267 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5268 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5269 xmlParseConditionalSections(ctxt);
5270 }
5271 }
5272
5273 ctxt->instate = XML_PARSER_DTD;
5274}
5275
5276/**
5277 * xmlParseTextDecl:
5278 * @ctxt: an XML parser context
5279 *
5280 * parse an XML declaration header for external entities
5281 *
5282 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5283 *
5284 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5285 */
5286
5287void
5288xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5289 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005290 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005291
5292 /*
5293 * We know that '<?xml' is here.
5294 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005295 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005296 SKIP(5);
5297 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005298 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005299 return;
5300 }
5301
William M. Brack76e95df2003-10-18 16:20:14 +00005302 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005303 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5304 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005305 }
5306 SKIP_BLANKS;
5307
5308 /*
5309 * We may have the VersionInfo here.
5310 */
5311 version = xmlParseVersionInfo(ctxt);
5312 if (version == NULL)
5313 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005314 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5317 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005318 }
5319 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005320 ctxt->input->version = version;
5321
5322 /*
5323 * We must have the encoding declaration
5324 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005325 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005326 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5327 /*
5328 * The XML REC instructs us to stop parsing right here
5329 */
5330 return;
5331 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005332 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5333 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5334 "Missing encoding in text declaration\n");
5335 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005336
5337 SKIP_BLANKS;
5338 if ((RAW == '?') && (NXT(1) == '>')) {
5339 SKIP(2);
5340 } else if (RAW == '>') {
5341 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005343 NEXT;
5344 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005345 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005346 MOVETO_ENDTAG(CUR_PTR);
5347 NEXT;
5348 }
5349}
5350
5351/**
Owen Taylor3473f882001-02-23 17:55:21 +00005352 * xmlParseExternalSubset:
5353 * @ctxt: an XML parser context
5354 * @ExternalID: the external identifier
5355 * @SystemID: the system identifier (or URL)
5356 *
5357 * parse Markup declarations from an external subset
5358 *
5359 * [30] extSubset ::= textDecl? extSubsetDecl
5360 *
5361 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5362 */
5363void
5364xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5365 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005366 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005367 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005368 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005369 xmlParseTextDecl(ctxt);
5370 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5371 /*
5372 * The XML REC instructs us to stop parsing right here
5373 */
5374 ctxt->instate = XML_PARSER_EOF;
5375 return;
5376 }
5377 }
5378 if (ctxt->myDoc == NULL) {
5379 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5380 }
5381 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5382 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5383
5384 ctxt->instate = XML_PARSER_DTD;
5385 ctxt->external = 1;
5386 while (((RAW == '<') && (NXT(1) == '?')) ||
5387 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005388 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005389 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005390 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 GROW;
5393 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5394 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005395 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005396 NEXT;
5397 } else if (RAW == '%') {
5398 xmlParsePEReference(ctxt);
5399 } else
5400 xmlParseMarkupDecl(ctxt);
5401
5402 /*
5403 * Pop-up of finished entities.
5404 */
5405 while ((RAW == 0) && (ctxt->inputNr > 1))
5406 xmlPopInput(ctxt);
5407
Daniel Veillardfdc91562002-07-01 21:52:03 +00005408 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005409 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005410 break;
5411 }
5412 }
5413
5414 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005415 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005416 }
5417
5418}
5419
5420/**
5421 * xmlParseReference:
5422 * @ctxt: an XML parser context
5423 *
5424 * parse and handle entity references in content, depending on the SAX
5425 * interface, this may end-up in a call to character() if this is a
5426 * CharRef, a predefined entity, if there is no reference() callback.
5427 * or if the parser was asked to switch to that mode.
5428 *
5429 * [67] Reference ::= EntityRef | CharRef
5430 */
5431void
5432xmlParseReference(xmlParserCtxtPtr ctxt) {
5433 xmlEntityPtr ent;
5434 xmlChar *val;
5435 if (RAW != '&') return;
5436
5437 if (NXT(1) == '#') {
5438 int i = 0;
5439 xmlChar out[10];
5440 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005441 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005442
5443 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5444 /*
5445 * So we are using non-UTF-8 buffers
5446 * Check that the char fit on 8bits, if not
5447 * generate a CharRef.
5448 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005449 if (value <= 0xFF) {
5450 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005451 out[1] = 0;
5452 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5453 (!ctxt->disableSAX))
5454 ctxt->sax->characters(ctxt->userData, out, 1);
5455 } else {
5456 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005457 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005458 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005459 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5461 (!ctxt->disableSAX))
5462 ctxt->sax->reference(ctxt->userData, out);
5463 }
5464 } else {
5465 /*
5466 * Just encode the value in UTF-8
5467 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005468 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005469 out[i] = 0;
5470 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5471 (!ctxt->disableSAX))
5472 ctxt->sax->characters(ctxt->userData, out, i);
5473 }
5474 } else {
5475 ent = xmlParseEntityRef(ctxt);
5476 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005477 if (!ctxt->wellFormed)
5478 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005479 if ((ent->name != NULL) &&
5480 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5481 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005482 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005483
5484
5485 /*
5486 * The first reference to the entity trigger a parsing phase
5487 * where the ent->children is filled with the result from
5488 * the parsing.
5489 */
5490 if (ent->children == NULL) {
5491 xmlChar *value;
5492 value = ent->content;
5493
5494 /*
5495 * Check that this entity is well formed
5496 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005497 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005498 (value[1] == 0) && (value[0] == '<') &&
5499 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5500 /*
5501 * DONE: get definite answer on this !!!
5502 * Lots of entity decls are used to declare a single
5503 * char
5504 * <!ENTITY lt "<">
5505 * Which seems to be valid since
5506 * 2.4: The ampersand character (&) and the left angle
5507 * bracket (<) may appear in their literal form only
5508 * when used ... They are also legal within the literal
5509 * entity value of an internal entity declaration;i
5510 * see "4.3.2 Well-Formed Parsed Entities".
5511 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5512 * Looking at the OASIS test suite and James Clark
5513 * tests, this is broken. However the XML REC uses
5514 * it. Is the XML REC not well-formed ????
5515 * This is a hack to avoid this problem
5516 *
5517 * ANSWER: since lt gt amp .. are already defined,
5518 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005519 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005520 * is lousy but acceptable.
5521 */
5522 list = xmlNewDocText(ctxt->myDoc, value);
5523 if (list != NULL) {
5524 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5525 (ent->children == NULL)) {
5526 ent->children = list;
5527 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005528 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005529 list->parent = (xmlNodePtr) ent;
5530 } else {
5531 xmlFreeNodeList(list);
5532 }
5533 } else if (list != NULL) {
5534 xmlFreeNodeList(list);
5535 }
5536 } else {
5537 /*
5538 * 4.3.2: An internal general parsed entity is well-formed
5539 * if its replacement text matches the production labeled
5540 * content.
5541 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005542
5543 void *user_data;
5544 /*
5545 * This is a bit hackish but this seems the best
5546 * way to make sure both SAX and DOM entity support
5547 * behaves okay.
5548 */
5549 if (ctxt->userData == ctxt)
5550 user_data = NULL;
5551 else
5552 user_data = ctxt->userData;
5553
Owen Taylor3473f882001-02-23 17:55:21 +00005554 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5555 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005556 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5557 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 ctxt->depth--;
5559 } else if (ent->etype ==
5560 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5561 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005562 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005563 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005564 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005565 ctxt->depth--;
5566 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005567 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005568 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5569 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
5571 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005572 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005573 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005574 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005575 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5576 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005577 (ent->children == NULL)) {
5578 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005579 if (ctxt->replaceEntities) {
5580 /*
5581 * Prune it directly in the generated document
5582 * except for single text nodes.
5583 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005584 if (((list->type == XML_TEXT_NODE) &&
5585 (list->next == NULL)) ||
5586 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005587 list->parent = (xmlNodePtr) ent;
5588 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005589 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005590 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005591 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005592 while (list != NULL) {
5593 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005594 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005595 if (list->next == NULL)
5596 ent->last = list;
5597 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005598 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005599 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005600#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005601 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5602 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005603#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005604 }
5605 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005606 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005607 while (list != NULL) {
5608 list->parent = (xmlNodePtr) ent;
5609 if (list->next == NULL)
5610 ent->last = list;
5611 list = list->next;
5612 }
Owen Taylor3473f882001-02-23 17:55:21 +00005613 }
5614 } else {
5615 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005616 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005618 } else if ((ret != XML_ERR_OK) &&
5619 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005620 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005621 } else if (list != NULL) {
5622 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005623 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005624 }
5625 }
5626 }
5627 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5628 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5629 /*
5630 * Create a node.
5631 */
5632 ctxt->sax->reference(ctxt->userData, ent->name);
5633 return;
5634 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005635 /*
5636 * There is a problem on the handling of _private for entities
5637 * (bug 155816): Should we copy the content of the field from
5638 * the entity (possibly overwriting some value set by the user
5639 * when a copy is created), should we leave it alone, or should
5640 * we try to take care of different situations? The problem
5641 * is exacerbated by the usage of this field by the xmlReader.
5642 * To fix this bug, we look at _private on the created node
5643 * and, if it's NULL, we copy in whatever was in the entity.
5644 * If it's not NULL we leave it alone. This is somewhat of a
5645 * hack - maybe we should have further tests to determine
5646 * what to do.
5647 */
Owen Taylor3473f882001-02-23 17:55:21 +00005648 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5649 /*
5650 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005651 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005652 * In the first occurrence list contains the replacement.
5653 * progressive == 2 means we are operating on the Reader
5654 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005655 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005656 if (((list == NULL) && (ent->owner == 0)) ||
5657 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005658 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005659
5660 /*
5661 * when operating on a reader, the entities definitions
5662 * are always owning the entities subtree.
5663 if (ctxt->parseMode == XML_PARSE_READER)
5664 ent->owner = 1;
5665 */
5666
Daniel Veillard62f313b2001-07-04 19:49:14 +00005667 cur = ent->children;
5668 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005669 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005670 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005671 if (nw->_private == NULL)
5672 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005673 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005674 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005675 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005676 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005677 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005678 if (cur == ent->last) {
5679 /*
5680 * needed to detect some strange empty
5681 * node cases in the reader tests
5682 */
5683 if ((ctxt->parseMode == XML_PARSE_READER) &&
5684 (nw->type == XML_ELEMENT_NODE) &&
5685 (nw->children == NULL))
5686 nw->extra = 1;
5687
Daniel Veillard62f313b2001-07-04 19:49:14 +00005688 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005689 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005690 cur = cur->next;
5691 }
Daniel Veillard81273902003-09-30 00:43:48 +00005692#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005693 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005694 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005695#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005696 } else if (list == NULL) {
5697 xmlNodePtr nw = NULL, cur, next, last,
5698 firstChild = NULL;
5699 /*
5700 * Copy the entity child list and make it the new
5701 * entity child list. The goal is to make sure any
5702 * ID or REF referenced will be the one from the
5703 * document content and not the entity copy.
5704 */
5705 cur = ent->children;
5706 ent->children = NULL;
5707 last = ent->last;
5708 ent->last = NULL;
5709 while (cur != NULL) {
5710 next = cur->next;
5711 cur->next = NULL;
5712 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005713 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005714 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005715 if (nw->_private == NULL)
5716 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005717 if (firstChild == NULL){
5718 firstChild = cur;
5719 }
5720 xmlAddChild((xmlNodePtr) ent, nw);
5721 xmlAddChild(ctxt->node, cur);
5722 }
5723 if (cur == last)
5724 break;
5725 cur = next;
5726 }
5727 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005728#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005729 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5730 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005731#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005732 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005733 const xmlChar *nbktext;
5734
Daniel Veillard62f313b2001-07-04 19:49:14 +00005735 /*
5736 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005737 * node with a possible previous text one which
5738 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005739 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005740 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5741 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005742 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005743 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005744 if ((ent->last != ent->children) &&
5745 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005746 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005747 xmlAddChildList(ctxt->node, ent->children);
5748 }
5749
Owen Taylor3473f882001-02-23 17:55:21 +00005750 /*
5751 * This is to avoid a nasty side effect, see
5752 * characters() in SAX.c
5753 */
5754 ctxt->nodemem = 0;
5755 ctxt->nodelen = 0;
5756 return;
5757 } else {
5758 /*
5759 * Probably running in SAX mode
5760 */
5761 xmlParserInputPtr input;
5762
5763 input = xmlNewEntityInputStream(ctxt, ent);
5764 xmlPushInput(ctxt, input);
5765 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005766 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5767 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005768 xmlParseTextDecl(ctxt);
5769 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5770 /*
5771 * The XML REC instructs us to stop parsing right here
5772 */
5773 ctxt->instate = XML_PARSER_EOF;
5774 return;
5775 }
5776 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005777 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5778 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 }
5780 }
5781 return;
5782 }
5783 }
5784 } else {
5785 val = ent->content;
5786 if (val == NULL) return;
5787 /*
5788 * inline the entity.
5789 */
5790 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5791 (!ctxt->disableSAX))
5792 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5793 }
5794 }
5795}
5796
5797/**
5798 * xmlParseEntityRef:
5799 * @ctxt: an XML parser context
5800 *
5801 * parse ENTITY references declarations
5802 *
5803 * [68] EntityRef ::= '&' Name ';'
5804 *
5805 * [ WFC: Entity Declared ]
5806 * In a document without any DTD, a document with only an internal DTD
5807 * subset which contains no parameter entity references, or a document
5808 * with "standalone='yes'", the Name given in the entity reference
5809 * must match that in an entity declaration, except that well-formed
5810 * documents need not declare any of the following entities: amp, lt,
5811 * gt, apos, quot. The declaration of a parameter entity must precede
5812 * any reference to it. Similarly, the declaration of a general entity
5813 * must precede any reference to it which appears in a default value in an
5814 * attribute-list declaration. Note that if entities are declared in the
5815 * external subset or in external parameter entities, a non-validating
5816 * processor is not obligated to read and process their declarations;
5817 * for such documents, the rule that an entity must be declared is a
5818 * well-formedness constraint only if standalone='yes'.
5819 *
5820 * [ WFC: Parsed Entity ]
5821 * An entity reference must not contain the name of an unparsed entity
5822 *
5823 * Returns the xmlEntityPtr if found, or NULL otherwise.
5824 */
5825xmlEntityPtr
5826xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005827 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005828 xmlEntityPtr ent = NULL;
5829
5830 GROW;
5831
5832 if (RAW == '&') {
5833 NEXT;
5834 name = xmlParseName(ctxt);
5835 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005836 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5837 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005838 } else {
5839 if (RAW == ';') {
5840 NEXT;
5841 /*
5842 * Ask first SAX for entity resolution, otherwise try the
5843 * predefined set.
5844 */
5845 if (ctxt->sax != NULL) {
5846 if (ctxt->sax->getEntity != NULL)
5847 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005848 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00005849 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00005850 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
5851 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005852 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005853 }
Owen Taylor3473f882001-02-23 17:55:21 +00005854 }
5855 /*
5856 * [ WFC: Entity Declared ]
5857 * In a document without any DTD, a document with only an
5858 * internal DTD subset which contains no parameter entity
5859 * references, or a document with "standalone='yes'", the
5860 * Name given in the entity reference must match that in an
5861 * entity declaration, except that well-formed documents
5862 * need not declare any of the following entities: amp, lt,
5863 * gt, apos, quot.
5864 * The declaration of a parameter entity must precede any
5865 * reference to it.
5866 * Similarly, the declaration of a general entity must
5867 * precede any reference to it which appears in a default
5868 * value in an attribute-list declaration. Note that if
5869 * entities are declared in the external subset or in
5870 * external parameter entities, a non-validating processor
5871 * is not obligated to read and process their declarations;
5872 * for such documents, the rule that an entity must be
5873 * declared is a well-formedness constraint only if
5874 * standalone='yes'.
5875 */
5876 if (ent == NULL) {
5877 if ((ctxt->standalone == 1) ||
5878 ((ctxt->hasExternalSubset == 0) &&
5879 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005880 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005881 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005883 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005884 "Entity '%s' not defined\n", name);
5885 }
Daniel Veillardf403d292003-10-05 13:51:35 +00005886 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005887 }
5888
5889 /*
5890 * [ WFC: Parsed Entity ]
5891 * An entity reference must not contain the name of an
5892 * unparsed entity
5893 */
5894 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005895 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00005896 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 }
5898
5899 /*
5900 * [ WFC: No External Entity References ]
5901 * Attribute values cannot contain direct or indirect
5902 * entity references to external entities.
5903 */
5904 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5905 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005906 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
5907 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005908 }
5909 /*
5910 * [ WFC: No < in Attribute Values ]
5911 * The replacement text of any entity referred to directly or
5912 * indirectly in an attribute value (other than "&lt;") must
5913 * not contain a <.
5914 */
5915 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5916 (ent != NULL) &&
5917 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5918 (ent->content != NULL) &&
5919 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005920 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005922 }
5923
5924 /*
5925 * Internal check, no parameter entities here ...
5926 */
5927 else {
5928 switch (ent->etype) {
5929 case XML_INTERNAL_PARAMETER_ENTITY:
5930 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005931 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
5932 "Attempt to reference the parameter entity '%s'\n",
5933 name);
Owen Taylor3473f882001-02-23 17:55:21 +00005934 break;
5935 default:
5936 break;
5937 }
5938 }
5939
5940 /*
5941 * [ WFC: No Recursion ]
5942 * A parsed entity must not contain a recursive reference
5943 * to itself, either directly or indirectly.
5944 * Done somewhere else
5945 */
5946
5947 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005948 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005949 }
Owen Taylor3473f882001-02-23 17:55:21 +00005950 }
5951 }
5952 return(ent);
5953}
5954
5955/**
5956 * xmlParseStringEntityRef:
5957 * @ctxt: an XML parser context
5958 * @str: a pointer to an index in the string
5959 *
5960 * parse ENTITY references declarations, but this version parses it from
5961 * a string value.
5962 *
5963 * [68] EntityRef ::= '&' Name ';'
5964 *
5965 * [ WFC: Entity Declared ]
5966 * In a document without any DTD, a document with only an internal DTD
5967 * subset which contains no parameter entity references, or a document
5968 * with "standalone='yes'", the Name given in the entity reference
5969 * must match that in an entity declaration, except that well-formed
5970 * documents need not declare any of the following entities: amp, lt,
5971 * gt, apos, quot. The declaration of a parameter entity must precede
5972 * any reference to it. Similarly, the declaration of a general entity
5973 * must precede any reference to it which appears in a default value in an
5974 * attribute-list declaration. Note that if entities are declared in the
5975 * external subset or in external parameter entities, a non-validating
5976 * processor is not obligated to read and process their declarations;
5977 * for such documents, the rule that an entity must be declared is a
5978 * well-formedness constraint only if standalone='yes'.
5979 *
5980 * [ WFC: Parsed Entity ]
5981 * An entity reference must not contain the name of an unparsed entity
5982 *
5983 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5984 * is updated to the current location in the string.
5985 */
5986xmlEntityPtr
5987xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5988 xmlChar *name;
5989 const xmlChar *ptr;
5990 xmlChar cur;
5991 xmlEntityPtr ent = NULL;
5992
5993 if ((str == NULL) || (*str == NULL))
5994 return(NULL);
5995 ptr = *str;
5996 cur = *ptr;
5997 if (cur == '&') {
5998 ptr++;
5999 cur = *ptr;
6000 name = xmlParseStringName(ctxt, &ptr);
6001 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006002 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6003 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006004 } else {
6005 if (*ptr == ';') {
6006 ptr++;
6007 /*
6008 * Ask first SAX for entity resolution, otherwise try the
6009 * predefined set.
6010 */
6011 if (ctxt->sax != NULL) {
6012 if (ctxt->sax->getEntity != NULL)
6013 ent = ctxt->sax->getEntity(ctxt->userData, name);
6014 if (ent == NULL)
6015 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006016 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006017 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006018 }
Owen Taylor3473f882001-02-23 17:55:21 +00006019 }
6020 /*
6021 * [ WFC: Entity Declared ]
6022 * In a document without any DTD, a document with only an
6023 * internal DTD subset which contains no parameter entity
6024 * references, or a document with "standalone='yes'", the
6025 * Name given in the entity reference must match that in an
6026 * entity declaration, except that well-formed documents
6027 * need not declare any of the following entities: amp, lt,
6028 * gt, apos, quot.
6029 * The declaration of a parameter entity must precede any
6030 * reference to it.
6031 * Similarly, the declaration of a general entity must
6032 * precede any reference to it which appears in a default
6033 * value in an attribute-list declaration. Note that if
6034 * entities are declared in the external subset or in
6035 * external parameter entities, a non-validating processor
6036 * is not obligated to read and process their declarations;
6037 * for such documents, the rule that an entity must be
6038 * declared is a well-formedness constraint only if
6039 * standalone='yes'.
6040 */
6041 if (ent == NULL) {
6042 if ((ctxt->standalone == 1) ||
6043 ((ctxt->hasExternalSubset == 0) &&
6044 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006045 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006046 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006047 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006048 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006049 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006050 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006052 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006053 }
6054
6055 /*
6056 * [ WFC: Parsed Entity ]
6057 * An entity reference must not contain the name of an
6058 * unparsed entity
6059 */
6060 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006061 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006062 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006063 }
6064
6065 /*
6066 * [ WFC: No External Entity References ]
6067 * Attribute values cannot contain direct or indirect
6068 * entity references to external entities.
6069 */
6070 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6071 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006072 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006073 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006074 }
6075 /*
6076 * [ WFC: No < in Attribute Values ]
6077 * The replacement text of any entity referred to directly or
6078 * indirectly in an attribute value (other than "&lt;") must
6079 * not contain a <.
6080 */
6081 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6082 (ent != NULL) &&
6083 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6084 (ent->content != NULL) &&
6085 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006086 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6087 "'<' in entity '%s' is not allowed in attributes values\n",
6088 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006089 }
6090
6091 /*
6092 * Internal check, no parameter entities here ...
6093 */
6094 else {
6095 switch (ent->etype) {
6096 case XML_INTERNAL_PARAMETER_ENTITY:
6097 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006098 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6099 "Attempt to reference the parameter entity '%s'\n",
6100 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 break;
6102 default:
6103 break;
6104 }
6105 }
6106
6107 /*
6108 * [ WFC: No Recursion ]
6109 * A parsed entity must not contain a recursive reference
6110 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006111 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006112 */
6113
6114 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006116 }
6117 xmlFree(name);
6118 }
6119 }
6120 *str = ptr;
6121 return(ent);
6122}
6123
6124/**
6125 * xmlParsePEReference:
6126 * @ctxt: an XML parser context
6127 *
6128 * parse PEReference declarations
6129 * The entity content is handled directly by pushing it's content as
6130 * a new input stream.
6131 *
6132 * [69] PEReference ::= '%' Name ';'
6133 *
6134 * [ WFC: No Recursion ]
6135 * A parsed entity must not contain a recursive
6136 * reference to itself, either directly or indirectly.
6137 *
6138 * [ WFC: Entity Declared ]
6139 * In a document without any DTD, a document with only an internal DTD
6140 * subset which contains no parameter entity references, or a document
6141 * with "standalone='yes'", ... ... The declaration of a parameter
6142 * entity must precede any reference to it...
6143 *
6144 * [ VC: Entity Declared ]
6145 * In a document with an external subset or external parameter entities
6146 * with "standalone='no'", ... ... The declaration of a parameter entity
6147 * must precede any reference to it...
6148 *
6149 * [ WFC: In DTD ]
6150 * Parameter-entity references may only appear in the DTD.
6151 * NOTE: misleading but this is handled.
6152 */
6153void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006154xmlParsePEReference(xmlParserCtxtPtr ctxt)
6155{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006156 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 xmlEntityPtr entity = NULL;
6158 xmlParserInputPtr input;
6159
6160 if (RAW == '%') {
6161 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006162 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006163 if (name == NULL) {
6164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6165 "xmlParsePEReference: no name\n");
6166 } else {
6167 if (RAW == ';') {
6168 NEXT;
6169 if ((ctxt->sax != NULL) &&
6170 (ctxt->sax->getParameterEntity != NULL))
6171 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6172 name);
6173 if (entity == NULL) {
6174 /*
6175 * [ WFC: Entity Declared ]
6176 * In a document without any DTD, a document with only an
6177 * internal DTD subset which contains no parameter entity
6178 * references, or a document with "standalone='yes'", ...
6179 * ... The declaration of a parameter entity must precede
6180 * any reference to it...
6181 */
6182 if ((ctxt->standalone == 1) ||
6183 ((ctxt->hasExternalSubset == 0) &&
6184 (ctxt->hasPErefs == 0))) {
6185 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6186 "PEReference: %%%s; not found\n",
6187 name);
6188 } else {
6189 /*
6190 * [ VC: Entity Declared ]
6191 * In a document with an external subset or external
6192 * parameter entities with "standalone='no'", ...
6193 * ... The declaration of a parameter entity must
6194 * precede any reference to it...
6195 */
6196 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6197 "PEReference: %%%s; not found\n",
6198 name, NULL);
6199 ctxt->valid = 0;
6200 }
6201 } else {
6202 /*
6203 * Internal checking in case the entity quest barfed
6204 */
6205 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6206 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6207 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6208 "Internal: %%%s; is not a parameter entity\n",
6209 name, NULL);
6210 } else if (ctxt->input->free != deallocblankswrapper) {
6211 input =
6212 xmlNewBlanksWrapperInputStream(ctxt, entity);
6213 xmlPushInput(ctxt, input);
6214 } else {
6215 /*
6216 * TODO !!!
6217 * handle the extra spaces added before and after
6218 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6219 */
6220 input = xmlNewEntityInputStream(ctxt, entity);
6221 xmlPushInput(ctxt, input);
6222 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006223 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006224 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006225 xmlParseTextDecl(ctxt);
6226 if (ctxt->errNo ==
6227 XML_ERR_UNSUPPORTED_ENCODING) {
6228 /*
6229 * The XML REC instructs us to stop parsing
6230 * right here
6231 */
6232 ctxt->instate = XML_PARSER_EOF;
6233 return;
6234 }
6235 }
6236 }
6237 }
6238 ctxt->hasPErefs = 1;
6239 } else {
6240 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6241 }
6242 }
Owen Taylor3473f882001-02-23 17:55:21 +00006243 }
6244}
6245
6246/**
6247 * xmlParseStringPEReference:
6248 * @ctxt: an XML parser context
6249 * @str: a pointer to an index in the string
6250 *
6251 * parse PEReference declarations
6252 *
6253 * [69] PEReference ::= '%' Name ';'
6254 *
6255 * [ WFC: No Recursion ]
6256 * A parsed entity must not contain a recursive
6257 * reference to itself, either directly or indirectly.
6258 *
6259 * [ WFC: Entity Declared ]
6260 * In a document without any DTD, a document with only an internal DTD
6261 * subset which contains no parameter entity references, or a document
6262 * with "standalone='yes'", ... ... The declaration of a parameter
6263 * entity must precede any reference to it...
6264 *
6265 * [ VC: Entity Declared ]
6266 * In a document with an external subset or external parameter entities
6267 * with "standalone='no'", ... ... The declaration of a parameter entity
6268 * must precede any reference to it...
6269 *
6270 * [ WFC: In DTD ]
6271 * Parameter-entity references may only appear in the DTD.
6272 * NOTE: misleading but this is handled.
6273 *
6274 * Returns the string of the entity content.
6275 * str is updated to the current value of the index
6276 */
6277xmlEntityPtr
6278xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6279 const xmlChar *ptr;
6280 xmlChar cur;
6281 xmlChar *name;
6282 xmlEntityPtr entity = NULL;
6283
6284 if ((str == NULL) || (*str == NULL)) return(NULL);
6285 ptr = *str;
6286 cur = *ptr;
6287 if (cur == '%') {
6288 ptr++;
6289 cur = *ptr;
6290 name = xmlParseStringName(ctxt, &ptr);
6291 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006292 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6293 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006294 } else {
6295 cur = *ptr;
6296 if (cur == ';') {
6297 ptr++;
6298 cur = *ptr;
6299 if ((ctxt->sax != NULL) &&
6300 (ctxt->sax->getParameterEntity != NULL))
6301 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6302 name);
6303 if (entity == NULL) {
6304 /*
6305 * [ WFC: Entity Declared ]
6306 * In a document without any DTD, a document with only an
6307 * internal DTD subset which contains no parameter entity
6308 * references, or a document with "standalone='yes'", ...
6309 * ... The declaration of a parameter entity must precede
6310 * any reference to it...
6311 */
6312 if ((ctxt->standalone == 1) ||
6313 ((ctxt->hasExternalSubset == 0) &&
6314 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006315 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006316 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 } else {
6318 /*
6319 * [ VC: Entity Declared ]
6320 * In a document with an external subset or external
6321 * parameter entities with "standalone='no'", ...
6322 * ... The declaration of a parameter entity must
6323 * precede any reference to it...
6324 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006325 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6326 "PEReference: %%%s; not found\n",
6327 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 ctxt->valid = 0;
6329 }
6330 } else {
6331 /*
6332 * Internal checking in case the entity quest barfed
6333 */
6334 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6335 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006336 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6337 "%%%s; is not a parameter entity\n",
6338 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 }
6340 }
6341 ctxt->hasPErefs = 1;
6342 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006343 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006344 }
6345 xmlFree(name);
6346 }
6347 }
6348 *str = ptr;
6349 return(entity);
6350}
6351
6352/**
6353 * xmlParseDocTypeDecl:
6354 * @ctxt: an XML parser context
6355 *
6356 * parse a DOCTYPE declaration
6357 *
6358 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6359 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6360 *
6361 * [ VC: Root Element Type ]
6362 * The Name in the document type declaration must match the element
6363 * type of the root element.
6364 */
6365
6366void
6367xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006368 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006369 xmlChar *ExternalID = NULL;
6370 xmlChar *URI = NULL;
6371
6372 /*
6373 * We know that '<!DOCTYPE' has been detected.
6374 */
6375 SKIP(9);
6376
6377 SKIP_BLANKS;
6378
6379 /*
6380 * Parse the DOCTYPE name.
6381 */
6382 name = xmlParseName(ctxt);
6383 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006384 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6385 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006386 }
6387 ctxt->intSubName = name;
6388
6389 SKIP_BLANKS;
6390
6391 /*
6392 * Check for SystemID and ExternalID
6393 */
6394 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6395
6396 if ((URI != NULL) || (ExternalID != NULL)) {
6397 ctxt->hasExternalSubset = 1;
6398 }
6399 ctxt->extSubURI = URI;
6400 ctxt->extSubSystem = ExternalID;
6401
6402 SKIP_BLANKS;
6403
6404 /*
6405 * Create and update the internal subset.
6406 */
6407 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6408 (!ctxt->disableSAX))
6409 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6410
6411 /*
6412 * Is there any internal subset declarations ?
6413 * they are handled separately in xmlParseInternalSubset()
6414 */
6415 if (RAW == '[')
6416 return;
6417
6418 /*
6419 * We should be at the end of the DOCTYPE declaration.
6420 */
6421 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006422 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006423 }
6424 NEXT;
6425}
6426
6427/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006428 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006429 * @ctxt: an XML parser context
6430 *
6431 * parse the internal subset declaration
6432 *
6433 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6434 */
6435
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006436static void
Owen Taylor3473f882001-02-23 17:55:21 +00006437xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6438 /*
6439 * Is there any DTD definition ?
6440 */
6441 if (RAW == '[') {
6442 ctxt->instate = XML_PARSER_DTD;
6443 NEXT;
6444 /*
6445 * Parse the succession of Markup declarations and
6446 * PEReferences.
6447 * Subsequence (markupdecl | PEReference | S)*
6448 */
6449 while (RAW != ']') {
6450 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006451 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006452
6453 SKIP_BLANKS;
6454 xmlParseMarkupDecl(ctxt);
6455 xmlParsePEReference(ctxt);
6456
6457 /*
6458 * Pop-up of finished entities.
6459 */
6460 while ((RAW == 0) && (ctxt->inputNr > 1))
6461 xmlPopInput(ctxt);
6462
6463 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006464 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006465 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006466 break;
6467 }
6468 }
6469 if (RAW == ']') {
6470 NEXT;
6471 SKIP_BLANKS;
6472 }
6473 }
6474
6475 /*
6476 * We should be at the end of the DOCTYPE declaration.
6477 */
6478 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006479 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 }
6481 NEXT;
6482}
6483
Daniel Veillard81273902003-09-30 00:43:48 +00006484#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006485/**
6486 * xmlParseAttribute:
6487 * @ctxt: an XML parser context
6488 * @value: a xmlChar ** used to store the value of the attribute
6489 *
6490 * parse an attribute
6491 *
6492 * [41] Attribute ::= Name Eq AttValue
6493 *
6494 * [ WFC: No External Entity References ]
6495 * Attribute values cannot contain direct or indirect entity references
6496 * to external entities.
6497 *
6498 * [ WFC: No < in Attribute Values ]
6499 * The replacement text of any entity referred to directly or indirectly in
6500 * an attribute value (other than "&lt;") must not contain a <.
6501 *
6502 * [ VC: Attribute Value Type ]
6503 * The attribute must have been declared; the value must be of the type
6504 * declared for it.
6505 *
6506 * [25] Eq ::= S? '=' S?
6507 *
6508 * With namespace:
6509 *
6510 * [NS 11] Attribute ::= QName Eq AttValue
6511 *
6512 * Also the case QName == xmlns:??? is handled independently as a namespace
6513 * definition.
6514 *
6515 * Returns the attribute name, and the value in *value.
6516 */
6517
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006518const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006519xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006520 const xmlChar *name;
6521 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006522
6523 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006524 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 name = xmlParseName(ctxt);
6526 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006527 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006528 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006529 return(NULL);
6530 }
6531
6532 /*
6533 * read the value
6534 */
6535 SKIP_BLANKS;
6536 if (RAW == '=') {
6537 NEXT;
6538 SKIP_BLANKS;
6539 val = xmlParseAttValue(ctxt);
6540 ctxt->instate = XML_PARSER_CONTENT;
6541 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006542 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006543 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006544 return(NULL);
6545 }
6546
6547 /*
6548 * Check that xml:lang conforms to the specification
6549 * No more registered as an error, just generate a warning now
6550 * since this was deprecated in XML second edition
6551 */
6552 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6553 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006554 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6555 "Malformed value for xml:lang : %s\n",
6556 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006557 }
6558 }
6559
6560 /*
6561 * Check that xml:space conforms to the specification
6562 */
6563 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6564 if (xmlStrEqual(val, BAD_CAST "default"))
6565 *(ctxt->space) = 0;
6566 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6567 *(ctxt->space) = 1;
6568 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006569 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006570"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006571 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006572 }
6573 }
6574
6575 *value = val;
6576 return(name);
6577}
6578
6579/**
6580 * xmlParseStartTag:
6581 * @ctxt: an XML parser context
6582 *
6583 * parse a start of tag either for rule element or
6584 * EmptyElement. In both case we don't parse the tag closing chars.
6585 *
6586 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6587 *
6588 * [ WFC: Unique Att Spec ]
6589 * No attribute name may appear more than once in the same start-tag or
6590 * empty-element tag.
6591 *
6592 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6593 *
6594 * [ WFC: Unique Att Spec ]
6595 * No attribute name may appear more than once in the same start-tag or
6596 * empty-element tag.
6597 *
6598 * With namespace:
6599 *
6600 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6601 *
6602 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6603 *
6604 * Returns the element name parsed
6605 */
6606
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006607const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006608xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006609 const xmlChar *name;
6610 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006611 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006612 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006613 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006614 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006615 int i;
6616
6617 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006618 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006619
6620 name = xmlParseName(ctxt);
6621 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006622 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006623 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006624 return(NULL);
6625 }
6626
6627 /*
6628 * Now parse the attributes, it ends up with the ending
6629 *
6630 * (S Attribute)* S?
6631 */
6632 SKIP_BLANKS;
6633 GROW;
6634
Daniel Veillard21a0f912001-02-25 19:54:14 +00006635 while ((RAW != '>') &&
6636 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006637 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006638 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006639 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006640
6641 attname = xmlParseAttribute(ctxt, &attvalue);
6642 if ((attname != NULL) && (attvalue != NULL)) {
6643 /*
6644 * [ WFC: Unique Att Spec ]
6645 * No attribute name may appear more than once in the same
6646 * start-tag or empty-element tag.
6647 */
6648 for (i = 0; i < nbatts;i += 2) {
6649 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006650 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006651 xmlFree(attvalue);
6652 goto failed;
6653 }
6654 }
Owen Taylor3473f882001-02-23 17:55:21 +00006655 /*
6656 * Add the pair to atts
6657 */
6658 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006659 maxatts = 22; /* allow for 10 attrs by default */
6660 atts = (const xmlChar **)
6661 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006662 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006663 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006664 if (attvalue != NULL)
6665 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006666 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006667 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006668 ctxt->atts = atts;
6669 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006670 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006671 const xmlChar **n;
6672
Owen Taylor3473f882001-02-23 17:55:21 +00006673 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006674 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006675 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006676 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006677 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006678 if (attvalue != NULL)
6679 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006680 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006681 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006682 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006683 ctxt->atts = atts;
6684 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
6686 atts[nbatts++] = attname;
6687 atts[nbatts++] = attvalue;
6688 atts[nbatts] = NULL;
6689 atts[nbatts + 1] = NULL;
6690 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006691 if (attvalue != NULL)
6692 xmlFree(attvalue);
6693 }
6694
6695failed:
6696
Daniel Veillard3772de32002-12-17 10:31:45 +00006697 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006698 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6699 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006700 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006703 }
6704 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006705 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6706 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006707 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6708 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006709 break;
6710 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006711 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006712 GROW;
6713 }
6714
6715 /*
6716 * SAX: Start of Element !
6717 */
6718 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006719 (!ctxt->disableSAX)) {
6720 if (nbatts > 0)
6721 ctxt->sax->startElement(ctxt->userData, name, atts);
6722 else
6723 ctxt->sax->startElement(ctxt->userData, name, NULL);
6724 }
Owen Taylor3473f882001-02-23 17:55:21 +00006725
6726 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006727 /* Free only the content strings */
6728 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006729 if (atts[i] != NULL)
6730 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006731 }
6732 return(name);
6733}
6734
6735/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006736 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006737 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006738 * @line: line of the start tag
6739 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006740 *
6741 * parse an end of tag
6742 *
6743 * [42] ETag ::= '</' Name S? '>'
6744 *
6745 * With namespace
6746 *
6747 * [NS 9] ETag ::= '</' QName S? '>'
6748 */
6749
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006750static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006751xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006752 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006753
6754 GROW;
6755 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006756 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006757 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006758 return;
6759 }
6760 SKIP(2);
6761
Daniel Veillard46de64e2002-05-29 08:21:33 +00006762 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006763
6764 /*
6765 * We should definitely be at the ending "S? '>'" part
6766 */
6767 GROW;
6768 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006769 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006771 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006772 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006773
6774 /*
6775 * [ WFC: Element Type Match ]
6776 * The Name in an element's end-tag must match the element type in the
6777 * start-tag.
6778 *
6779 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006780 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006781 if (name == NULL) name = BAD_CAST "unparseable";
6782 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006783 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006784 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006785 }
6786
6787 /*
6788 * SAX: End of Tag
6789 */
6790 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6791 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006792 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006793
Daniel Veillarde57ec792003-09-10 10:50:59 +00006794 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006795 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006796 return;
6797}
6798
6799/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006800 * xmlParseEndTag:
6801 * @ctxt: an XML parser context
6802 *
6803 * parse an end of tag
6804 *
6805 * [42] ETag ::= '</' Name S? '>'
6806 *
6807 * With namespace
6808 *
6809 * [NS 9] ETag ::= '</' QName S? '>'
6810 */
6811
6812void
6813xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006814 xmlParseEndTag1(ctxt, 0);
6815}
Daniel Veillard81273902003-09-30 00:43:48 +00006816#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006817
6818/************************************************************************
6819 * *
6820 * SAX 2 specific operations *
6821 * *
6822 ************************************************************************/
6823
6824static const xmlChar *
6825xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
6826 int len = 0, l;
6827 int c;
6828 int count = 0;
6829
6830 /*
6831 * Handler for more complex cases
6832 */
6833 GROW;
6834 c = CUR_CHAR(l);
6835 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006836 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006837 return(NULL);
6838 }
6839
6840 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00006841 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006842 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00006843 (IS_COMBINING(c)) ||
6844 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006845 if (count++ > 100) {
6846 count = 0;
6847 GROW;
6848 }
6849 len += l;
6850 NEXTL(l);
6851 c = CUR_CHAR(l);
6852 }
6853 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
6854}
6855
6856/*
6857 * xmlGetNamespace:
6858 * @ctxt: an XML parser context
6859 * @prefix: the prefix to lookup
6860 *
6861 * Lookup the namespace name for the @prefix (which ca be NULL)
6862 * The prefix must come from the @ctxt->dict dictionnary
6863 *
6864 * Returns the namespace name or NULL if not bound
6865 */
6866static const xmlChar *
6867xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
6868 int i;
6869
Daniel Veillarde57ec792003-09-10 10:50:59 +00006870 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006871 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00006872 if (ctxt->nsTab[i] == prefix) {
6873 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
6874 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006875 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006876 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006877 return(NULL);
6878}
6879
6880/**
6881 * xmlParseNCName:
6882 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00006883 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00006884 *
6885 * parse an XML name.
6886 *
6887 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
6888 * CombiningChar | Extender
6889 *
6890 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
6891 *
6892 * Returns the Name parsed or NULL
6893 */
6894
6895static const xmlChar *
6896xmlParseNCName(xmlParserCtxtPtr ctxt) {
6897 const xmlChar *in;
6898 const xmlChar *ret;
6899 int count = 0;
6900
6901 /*
6902 * Accelerator for simple ASCII names
6903 */
6904 in = ctxt->input->cur;
6905 if (((*in >= 0x61) && (*in <= 0x7A)) ||
6906 ((*in >= 0x41) && (*in <= 0x5A)) ||
6907 (*in == '_')) {
6908 in++;
6909 while (((*in >= 0x61) && (*in <= 0x7A)) ||
6910 ((*in >= 0x41) && (*in <= 0x5A)) ||
6911 ((*in >= 0x30) && (*in <= 0x39)) ||
6912 (*in == '_') || (*in == '-') ||
6913 (*in == '.'))
6914 in++;
6915 if ((*in > 0) && (*in < 0x80)) {
6916 count = in - ctxt->input->cur;
6917 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
6918 ctxt->input->cur = in;
6919 ctxt->nbChars += count;
6920 ctxt->input->col += count;
6921 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006922 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006923 }
6924 return(ret);
6925 }
6926 }
6927 return(xmlParseNCNameComplex(ctxt));
6928}
6929
6930/**
6931 * xmlParseQName:
6932 * @ctxt: an XML parser context
6933 * @prefix: pointer to store the prefix part
6934 *
6935 * parse an XML Namespace QName
6936 *
6937 * [6] QName ::= (Prefix ':')? LocalPart
6938 * [7] Prefix ::= NCName
6939 * [8] LocalPart ::= NCName
6940 *
6941 * Returns the Name parsed or NULL
6942 */
6943
6944static const xmlChar *
6945xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
6946 const xmlChar *l, *p;
6947
6948 GROW;
6949
6950 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006951 if (l == NULL) {
6952 if (CUR == ':') {
6953 l = xmlParseName(ctxt);
6954 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006955 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6956 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006957 *prefix = NULL;
6958 return(l);
6959 }
6960 }
6961 return(NULL);
6962 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00006963 if (CUR == ':') {
6964 NEXT;
6965 p = l;
6966 l = xmlParseNCName(ctxt);
6967 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006968 xmlChar *tmp;
6969
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006970 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6971 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006972 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
6973 p = xmlDictLookup(ctxt->dict, tmp, -1);
6974 if (tmp != NULL) xmlFree(tmp);
6975 *prefix = NULL;
6976 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006977 }
6978 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006979 xmlChar *tmp;
6980
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006981 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
6982 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00006983 NEXT;
6984 tmp = (xmlChar *) xmlParseName(ctxt);
6985 if (tmp != NULL) {
6986 tmp = xmlBuildQName(tmp, l, NULL, 0);
6987 l = xmlDictLookup(ctxt->dict, tmp, -1);
6988 if (tmp != NULL) xmlFree(tmp);
6989 *prefix = p;
6990 return(l);
6991 }
6992 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
6993 l = xmlDictLookup(ctxt->dict, tmp, -1);
6994 if (tmp != NULL) xmlFree(tmp);
6995 *prefix = p;
6996 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00006997 }
6998 *prefix = p;
6999 } else
7000 *prefix = NULL;
7001 return(l);
7002}
7003
7004/**
7005 * xmlParseQNameAndCompare:
7006 * @ctxt: an XML parser context
7007 * @name: the localname
7008 * @prefix: the prefix, if any.
7009 *
7010 * parse an XML name and compares for match
7011 * (specialized for endtag parsing)
7012 *
7013 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7014 * and the name for mismatch
7015 */
7016
7017static const xmlChar *
7018xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7019 xmlChar const *prefix) {
7020 const xmlChar *cmp = name;
7021 const xmlChar *in;
7022 const xmlChar *ret;
7023 const xmlChar *prefix2;
7024
7025 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7026
7027 GROW;
7028 in = ctxt->input->cur;
7029
7030 cmp = prefix;
7031 while (*in != 0 && *in == *cmp) {
7032 ++in;
7033 ++cmp;
7034 }
7035 if ((*cmp == 0) && (*in == ':')) {
7036 in++;
7037 cmp = name;
7038 while (*in != 0 && *in == *cmp) {
7039 ++in;
7040 ++cmp;
7041 }
William M. Brack76e95df2003-10-18 16:20:14 +00007042 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007043 /* success */
7044 ctxt->input->cur = in;
7045 return((const xmlChar*) 1);
7046 }
7047 }
7048 /*
7049 * all strings coms from the dictionary, equality can be done directly
7050 */
7051 ret = xmlParseQName (ctxt, &prefix2);
7052 if ((ret == name) && (prefix == prefix2))
7053 return((const xmlChar*) 1);
7054 return ret;
7055}
7056
7057/**
7058 * xmlParseAttValueInternal:
7059 * @ctxt: an XML parser context
7060 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007061 * @alloc: whether the attribute was reallocated as a new string
7062 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063 *
7064 * parse a value for an attribute.
7065 * NOTE: if no normalization is needed, the routine will return pointers
7066 * directly from the data buffer.
7067 *
7068 * 3.3.3 Attribute-Value Normalization:
7069 * Before the value of an attribute is passed to the application or
7070 * checked for validity, the XML processor must normalize it as follows:
7071 * - a character reference is processed by appending the referenced
7072 * character to the attribute value
7073 * - an entity reference is processed by recursively processing the
7074 * replacement text of the entity
7075 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7076 * appending #x20 to the normalized value, except that only a single
7077 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7078 * parsed entity or the literal entity value of an internal parsed entity
7079 * - other characters are processed by appending them to the normalized value
7080 * If the declared value is not CDATA, then the XML processor must further
7081 * process the normalized attribute value by discarding any leading and
7082 * trailing space (#x20) characters, and by replacing sequences of space
7083 * (#x20) characters by a single space (#x20) character.
7084 * All attributes for which no declaration has been read should be treated
7085 * by a non-validating parser as if declared CDATA.
7086 *
7087 * Returns the AttValue parsed or NULL. The value has to be freed by the
7088 * caller if it was copied, this can be detected by val[*len] == 0.
7089 */
7090
7091static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007092xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7093 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007094{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007095 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007096 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007097 xmlChar *ret = NULL;
7098
7099 GROW;
7100 in = (xmlChar *) CUR_PTR;
7101 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007102 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007103 return (NULL);
7104 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007105 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007106
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007107 /*
7108 * try to handle in this routine the most common case where no
7109 * allocation of a new string is required and where content is
7110 * pure ASCII.
7111 */
7112 limit = *in++;
7113 end = ctxt->input->end;
7114 start = in;
7115 if (in >= end) {
7116 const xmlChar *oldbase = ctxt->input->base;
7117 GROW;
7118 if (oldbase != ctxt->input->base) {
7119 long delta = ctxt->input->base - oldbase;
7120 start = start + delta;
7121 in = in + delta;
7122 }
7123 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007124 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007125 if (normalize) {
7126 /*
7127 * Skip any leading spaces
7128 */
7129 while ((in < end) && (*in != limit) &&
7130 ((*in == 0x20) || (*in == 0x9) ||
7131 (*in == 0xA) || (*in == 0xD))) {
7132 in++;
7133 start = in;
7134 if (in >= end) {
7135 const xmlChar *oldbase = ctxt->input->base;
7136 GROW;
7137 if (oldbase != ctxt->input->base) {
7138 long delta = ctxt->input->base - oldbase;
7139 start = start + delta;
7140 in = in + delta;
7141 }
7142 end = ctxt->input->end;
7143 }
7144 }
7145 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7146 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7147 if ((*in++ == 0x20) && (*in == 0x20)) break;
7148 if (in >= end) {
7149 const xmlChar *oldbase = ctxt->input->base;
7150 GROW;
7151 if (oldbase != ctxt->input->base) {
7152 long delta = ctxt->input->base - oldbase;
7153 start = start + delta;
7154 in = in + delta;
7155 }
7156 end = ctxt->input->end;
7157 }
7158 }
7159 last = in;
7160 /*
7161 * skip the trailing blanks
7162 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007163 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007164 while ((in < end) && (*in != limit) &&
7165 ((*in == 0x20) || (*in == 0x9) ||
7166 (*in == 0xA) || (*in == 0xD))) {
7167 in++;
7168 if (in >= end) {
7169 const xmlChar *oldbase = ctxt->input->base;
7170 GROW;
7171 if (oldbase != ctxt->input->base) {
7172 long delta = ctxt->input->base - oldbase;
7173 start = start + delta;
7174 in = in + delta;
7175 last = last + delta;
7176 }
7177 end = ctxt->input->end;
7178 }
7179 }
7180 if (*in != limit) goto need_complex;
7181 } else {
7182 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7183 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7184 in++;
7185 if (in >= end) {
7186 const xmlChar *oldbase = ctxt->input->base;
7187 GROW;
7188 if (oldbase != ctxt->input->base) {
7189 long delta = ctxt->input->base - oldbase;
7190 start = start + delta;
7191 in = in + delta;
7192 }
7193 end = ctxt->input->end;
7194 }
7195 }
7196 last = in;
7197 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007198 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007199 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007200 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007201 *len = last - start;
7202 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007203 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007204 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007205 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007206 }
7207 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007208 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007209 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007210need_complex:
7211 if (alloc) *alloc = 1;
7212 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007213}
7214
7215/**
7216 * xmlParseAttribute2:
7217 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007218 * @pref: the element prefix
7219 * @elem: the element name
7220 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007221 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007222 * @len: an int * to save the length of the attribute
7223 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007224 *
7225 * parse an attribute in the new SAX2 framework.
7226 *
7227 * Returns the attribute name, and the value in *value, .
7228 */
7229
7230static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007231xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7232 const xmlChar *pref, const xmlChar *elem,
7233 const xmlChar **prefix, xmlChar **value,
7234 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007235 const xmlChar *name;
7236 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007237 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007238
7239 *value = NULL;
7240 GROW;
7241 name = xmlParseQName(ctxt, prefix);
7242 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007243 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7244 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007245 return(NULL);
7246 }
7247
7248 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007249 * get the type if needed
7250 */
7251 if (ctxt->attsSpecial != NULL) {
7252 int type;
7253
7254 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7255 pref, elem, *prefix, name);
7256 if (type != 0) normalize = 1;
7257 }
7258
7259 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007260 * read the value
7261 */
7262 SKIP_BLANKS;
7263 if (RAW == '=') {
7264 NEXT;
7265 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007266 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007267 ctxt->instate = XML_PARSER_CONTENT;
7268 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007269 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007270 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007271 return(NULL);
7272 }
7273
7274 /*
7275 * Check that xml:lang conforms to the specification
7276 * No more registered as an error, just generate a warning now
7277 * since this was deprecated in XML second edition
7278 */
7279 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7280 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007281 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7282 "Malformed value for xml:lang : %s\n",
7283 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007284 }
7285 }
7286
7287 /*
7288 * Check that xml:space conforms to the specification
7289 */
7290 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7291 if (xmlStrEqual(val, BAD_CAST "default"))
7292 *(ctxt->space) = 0;
7293 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7294 *(ctxt->space) = 1;
7295 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007296 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007297"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7298 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007299 }
7300 }
7301
7302 *value = val;
7303 return(name);
7304}
7305
7306/**
7307 * xmlParseStartTag2:
7308 * @ctxt: an XML parser context
7309 *
7310 * parse a start of tag either for rule element or
7311 * EmptyElement. In both case we don't parse the tag closing chars.
7312 * This routine is called when running SAX2 parsing
7313 *
7314 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7315 *
7316 * [ WFC: Unique Att Spec ]
7317 * No attribute name may appear more than once in the same start-tag or
7318 * empty-element tag.
7319 *
7320 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7321 *
7322 * [ WFC: Unique Att Spec ]
7323 * No attribute name may appear more than once in the same start-tag or
7324 * empty-element tag.
7325 *
7326 * With namespace:
7327 *
7328 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7329 *
7330 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7331 *
7332 * Returns the element name parsed
7333 */
7334
7335static const xmlChar *
7336xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007337 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007338 const xmlChar *localname;
7339 const xmlChar *prefix;
7340 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007341 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007342 const xmlChar *nsname;
7343 xmlChar *attvalue;
7344 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007345 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007346 int nratts, nbatts, nbdef;
7347 int i, j, nbNs, attval;
7348 const xmlChar *base;
7349 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007350
7351 if (RAW != '<') return(NULL);
7352 NEXT1;
7353
7354 /*
7355 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7356 * point since the attribute values may be stored as pointers to
7357 * the buffer and calling SHRINK would destroy them !
7358 * The Shrinking is only possible once the full set of attribute
7359 * callbacks have been done.
7360 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007361reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007362 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007363 base = ctxt->input->base;
7364 cur = ctxt->input->cur - ctxt->input->base;
7365 nbatts = 0;
7366 nratts = 0;
7367 nbdef = 0;
7368 nbNs = 0;
7369 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007370
7371 localname = xmlParseQName(ctxt, &prefix);
7372 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007373 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7374 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007375 return(NULL);
7376 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007377 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007378
7379 /*
7380 * Now parse the attributes, it ends up with the ending
7381 *
7382 * (S Attribute)* S?
7383 */
7384 SKIP_BLANKS;
7385 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007386 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007387
7388 while ((RAW != '>') &&
7389 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007390 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007391 const xmlChar *q = CUR_PTR;
7392 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007393 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007394
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007395 attname = xmlParseAttribute2(ctxt, prefix, localname,
7396 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007397 if ((attname != NULL) && (attvalue != NULL)) {
7398 if (len < 0) len = xmlStrlen(attvalue);
7399 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007400 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7401 xmlURIPtr uri;
7402
7403 if (*URL != 0) {
7404 uri = xmlParseURI((const char *) URL);
7405 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007406 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7407 "xmlns: %s not a valid URI\n",
7408 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007409 } else {
7410 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007411 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7412 "xmlns: URI %s is not absolute\n",
7413 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007414 }
7415 xmlFreeURI(uri);
7416 }
7417 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007418 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007419 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007420 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007421 for (j = 1;j <= nbNs;j++)
7422 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7423 break;
7424 if (j <= nbNs)
7425 xmlErrAttributeDup(ctxt, NULL, attname);
7426 else
7427 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007428 if (alloc != 0) xmlFree(attvalue);
7429 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007430 continue;
7431 }
7432 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007433 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7434 xmlURIPtr uri;
7435
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007436 if (attname == ctxt->str_xml) {
7437 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007438 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7439 "xml namespace prefix mapped to wrong URI\n",
7440 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007441 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007442 /*
7443 * Do not keep a namespace definition node
7444 */
7445 if (alloc != 0) xmlFree(attvalue);
7446 SKIP_BLANKS;
7447 continue;
7448 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007449 uri = xmlParseURI((const char *) URL);
7450 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007451 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7452 "xmlns:%s: '%s' is not a valid URI\n",
7453 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007454 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007455 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007456 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7457 "xmlns:%s: URI %s is not absolute\n",
7458 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007459 }
7460 xmlFreeURI(uri);
7461 }
7462
Daniel Veillard0fb18932003-09-07 09:14:37 +00007463 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007464 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007465 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007466 for (j = 1;j <= nbNs;j++)
7467 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7468 break;
7469 if (j <= nbNs)
7470 xmlErrAttributeDup(ctxt, aprefix, attname);
7471 else
7472 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007473 if (alloc != 0) xmlFree(attvalue);
7474 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007475 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476 continue;
7477 }
7478
7479 /*
7480 * Add the pair to atts
7481 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007482 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7483 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007484 if (attvalue[len] == 0)
7485 xmlFree(attvalue);
7486 goto failed;
7487 }
7488 maxatts = ctxt->maxatts;
7489 atts = ctxt->atts;
7490 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007491 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007492 atts[nbatts++] = attname;
7493 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007494 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007495 atts[nbatts++] = attvalue;
7496 attvalue += len;
7497 atts[nbatts++] = attvalue;
7498 /*
7499 * tag if some deallocation is needed
7500 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007501 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007502 } else {
7503 if ((attvalue != NULL) && (attvalue[len] == 0))
7504 xmlFree(attvalue);
7505 }
7506
7507failed:
7508
7509 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007510 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007511 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7512 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007513 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7515 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007516 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007517 }
7518 SKIP_BLANKS;
7519 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7520 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007521 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007522 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007523 break;
7524 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007525 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007526 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007527 }
7528
Daniel Veillard0fb18932003-09-07 09:14:37 +00007529 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007530 * The attributes defaulting
7531 */
7532 if (ctxt->attsDefault != NULL) {
7533 xmlDefAttrsPtr defaults;
7534
7535 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7536 if (defaults != NULL) {
7537 for (i = 0;i < defaults->nbAttrs;i++) {
7538 attname = defaults->values[4 * i];
7539 aprefix = defaults->values[4 * i + 1];
7540
7541 /*
7542 * special work for namespaces defaulted defs
7543 */
7544 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7545 /*
7546 * check that it's not a defined namespace
7547 */
7548 for (j = 1;j <= nbNs;j++)
7549 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7550 break;
7551 if (j <= nbNs) continue;
7552
7553 nsname = xmlGetNamespace(ctxt, NULL);
7554 if (nsname != defaults->values[4 * i + 2]) {
7555 if (nsPush(ctxt, NULL,
7556 defaults->values[4 * i + 2]) > 0)
7557 nbNs++;
7558 }
7559 } else if (aprefix == ctxt->str_xmlns) {
7560 /*
7561 * check that it's not a defined namespace
7562 */
7563 for (j = 1;j <= nbNs;j++)
7564 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7565 break;
7566 if (j <= nbNs) continue;
7567
7568 nsname = xmlGetNamespace(ctxt, attname);
7569 if (nsname != defaults->values[2]) {
7570 if (nsPush(ctxt, attname,
7571 defaults->values[4 * i + 2]) > 0)
7572 nbNs++;
7573 }
7574 } else {
7575 /*
7576 * check that it's not a defined attribute
7577 */
7578 for (j = 0;j < nbatts;j+=5) {
7579 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7580 break;
7581 }
7582 if (j < nbatts) continue;
7583
7584 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7585 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007586 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007587 }
7588 maxatts = ctxt->maxatts;
7589 atts = ctxt->atts;
7590 }
7591 atts[nbatts++] = attname;
7592 atts[nbatts++] = aprefix;
7593 if (aprefix == NULL)
7594 atts[nbatts++] = NULL;
7595 else
7596 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7597 atts[nbatts++] = defaults->values[4 * i + 2];
7598 atts[nbatts++] = defaults->values[4 * i + 3];
7599 nbdef++;
7600 }
7601 }
7602 }
7603 }
7604
Daniel Veillarde70c8772003-11-25 07:21:18 +00007605 /*
7606 * The attributes checkings
7607 */
7608 for (i = 0; i < nbatts;i += 5) {
7609 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7610 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7611 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7612 "Namespace prefix %s for %s on %s is not defined\n",
7613 atts[i + 1], atts[i], localname);
7614 }
7615 atts[i + 2] = nsname;
7616 /*
7617 * [ WFC: Unique Att Spec ]
7618 * No attribute name may appear more than once in the same
7619 * start-tag or empty-element tag.
7620 * As extended by the Namespace in XML REC.
7621 */
7622 for (j = 0; j < i;j += 5) {
7623 if (atts[i] == atts[j]) {
7624 if (atts[i+1] == atts[j+1]) {
7625 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7626 break;
7627 }
7628 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7629 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7630 "Namespaced Attribute %s in '%s' redefined\n",
7631 atts[i], nsname, NULL);
7632 break;
7633 }
7634 }
7635 }
7636 }
7637
Daniel Veillarde57ec792003-09-10 10:50:59 +00007638 nsname = xmlGetNamespace(ctxt, prefix);
7639 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007640 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7641 "Namespace prefix %s on %s is not defined\n",
7642 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007643 }
7644 *pref = prefix;
7645 *URI = nsname;
7646
7647 /*
7648 * SAX: Start of Element !
7649 */
7650 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7651 (!ctxt->disableSAX)) {
7652 if (nbNs > 0)
7653 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7654 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7655 nbatts / 5, nbdef, atts);
7656 else
7657 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7658 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7659 }
7660
7661 /*
7662 * Free up attribute allocated strings if needed
7663 */
7664 if (attval != 0) {
7665 for (i = 3,j = 0; j < nratts;i += 5,j++)
7666 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7667 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007668 }
7669
7670 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007671
7672base_changed:
7673 /*
7674 * the attribute strings are valid iif the base didn't changed
7675 */
7676 if (attval != 0) {
7677 for (i = 3,j = 0; j < nratts;i += 5,j++)
7678 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7679 xmlFree((xmlChar *) atts[i]);
7680 }
7681 ctxt->input->cur = ctxt->input->base + cur;
7682 if (ctxt->wellFormed == 1) {
7683 goto reparse;
7684 }
7685 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007686}
7687
7688/**
7689 * xmlParseEndTag2:
7690 * @ctxt: an XML parser context
7691 * @line: line of the start tag
7692 * @nsNr: number of namespaces on the start tag
7693 *
7694 * parse an end of tag
7695 *
7696 * [42] ETag ::= '</' Name S? '>'
7697 *
7698 * With namespace
7699 *
7700 * [NS 9] ETag ::= '</' QName S? '>'
7701 */
7702
7703static void
7704xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007705 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007706 const xmlChar *name;
7707
7708 GROW;
7709 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007710 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007711 return;
7712 }
7713 SKIP(2);
7714
William M. Brack13dfa872004-09-18 04:52:08 +00007715 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007716 if (ctxt->input->cur[tlen] == '>') {
7717 ctxt->input->cur += tlen + 1;
7718 goto done;
7719 }
7720 ctxt->input->cur += tlen;
7721 name = (xmlChar*)1;
7722 } else {
7723 if (prefix == NULL)
7724 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7725 else
7726 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7727 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007728
7729 /*
7730 * We should definitely be at the ending "S? '>'" part
7731 */
7732 GROW;
7733 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007734 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007735 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007736 } else
7737 NEXT1;
7738
7739 /*
7740 * [ WFC: Element Type Match ]
7741 * The Name in an element's end-tag must match the element type in the
7742 * start-tag.
7743 *
7744 */
7745 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007746 if (name == NULL) name = BAD_CAST "unparseable";
7747 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007748 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007749 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007750 }
7751
7752 /*
7753 * SAX: End of Tag
7754 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007755done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007756 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7757 (!ctxt->disableSAX))
7758 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7759
Daniel Veillard0fb18932003-09-07 09:14:37 +00007760 spacePop(ctxt);
7761 if (nsNr != 0)
7762 nsPop(ctxt, nsNr);
7763 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007764}
7765
7766/**
Owen Taylor3473f882001-02-23 17:55:21 +00007767 * xmlParseCDSect:
7768 * @ctxt: an XML parser context
7769 *
7770 * Parse escaped pure raw content.
7771 *
7772 * [18] CDSect ::= CDStart CData CDEnd
7773 *
7774 * [19] CDStart ::= '<![CDATA['
7775 *
7776 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7777 *
7778 * [21] CDEnd ::= ']]>'
7779 */
7780void
7781xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7782 xmlChar *buf = NULL;
7783 int len = 0;
7784 int size = XML_PARSER_BUFFER_SIZE;
7785 int r, rl;
7786 int s, sl;
7787 int cur, l;
7788 int count = 0;
7789
Daniel Veillard8f597c32003-10-06 08:19:27 +00007790 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007791 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007792 SKIP(9);
7793 } else
7794 return;
7795
7796 ctxt->instate = XML_PARSER_CDATA_SECTION;
7797 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007798 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007799 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007800 ctxt->instate = XML_PARSER_CONTENT;
7801 return;
7802 }
7803 NEXTL(rl);
7804 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007805 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007806 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007807 ctxt->instate = XML_PARSER_CONTENT;
7808 return;
7809 }
7810 NEXTL(sl);
7811 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007812 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007813 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007814 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007815 return;
7816 }
William M. Brack871611b2003-10-18 04:53:14 +00007817 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007818 ((r != ']') || (s != ']') || (cur != '>'))) {
7819 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007820 xmlChar *tmp;
7821
Owen Taylor3473f882001-02-23 17:55:21 +00007822 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00007823 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7824 if (tmp == NULL) {
7825 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007826 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007827 return;
7828 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00007829 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00007830 }
7831 COPY_BUF(rl,buf,len,r);
7832 r = s;
7833 rl = sl;
7834 s = cur;
7835 sl = l;
7836 count++;
7837 if (count > 50) {
7838 GROW;
7839 count = 0;
7840 }
7841 NEXTL(l);
7842 cur = CUR_CHAR(l);
7843 }
7844 buf[len] = 0;
7845 ctxt->instate = XML_PARSER_CONTENT;
7846 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007847 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00007848 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00007849 xmlFree(buf);
7850 return;
7851 }
7852 NEXTL(l);
7853
7854 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007855 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00007856 */
7857 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7858 if (ctxt->sax->cdataBlock != NULL)
7859 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00007860 else if (ctxt->sax->characters != NULL)
7861 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00007862 }
7863 xmlFree(buf);
7864}
7865
7866/**
7867 * xmlParseContent:
7868 * @ctxt: an XML parser context
7869 *
7870 * Parse a content:
7871 *
7872 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
7873 */
7874
7875void
7876xmlParseContent(xmlParserCtxtPtr ctxt) {
7877 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00007878 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007879 ((RAW != '<') || (NXT(1) != '/'))) {
7880 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007881 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00007882 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00007883
7884 /*
Owen Taylor3473f882001-02-23 17:55:21 +00007885 * First case : a Processing Instruction.
7886 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00007887 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007888 xmlParsePI(ctxt);
7889 }
7890
7891 /*
7892 * Second case : a CDSection
7893 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00007894 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007895 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007896 xmlParseCDSect(ctxt);
7897 }
7898
7899 /*
7900 * Third case : a comment
7901 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007902 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00007903 (NXT(2) == '-') && (NXT(3) == '-')) {
7904 xmlParseComment(ctxt);
7905 ctxt->instate = XML_PARSER_CONTENT;
7906 }
7907
7908 /*
7909 * Fourth case : a sub-element.
7910 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00007911 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00007912 xmlParseElement(ctxt);
7913 }
7914
7915 /*
7916 * Fifth case : a reference. If if has not been resolved,
7917 * parsing returns it's Name, create the node
7918 */
7919
Daniel Veillard21a0f912001-02-25 19:54:14 +00007920 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00007921 xmlParseReference(ctxt);
7922 }
7923
7924 /*
7925 * Last case, text. Note that References are handled directly.
7926 */
7927 else {
7928 xmlParseCharData(ctxt, 0);
7929 }
7930
7931 GROW;
7932 /*
7933 * Pop-up of finished entities.
7934 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00007935 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00007936 xmlPopInput(ctxt);
7937 SHRINK;
7938
Daniel Veillardfdc91562002-07-01 21:52:03 +00007939 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007940 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7941 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007942 ctxt->instate = XML_PARSER_EOF;
7943 break;
7944 }
7945 }
7946}
7947
7948/**
7949 * xmlParseElement:
7950 * @ctxt: an XML parser context
7951 *
7952 * parse an XML element, this is highly recursive
7953 *
7954 * [39] element ::= EmptyElemTag | STag content ETag
7955 *
7956 * [ WFC: Element Type Match ]
7957 * The Name in an element's end-tag must match the element type in the
7958 * start-tag.
7959 *
Owen Taylor3473f882001-02-23 17:55:21 +00007960 */
7961
7962void
7963xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007964 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007965 const xmlChar *prefix;
7966 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00007967 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007968 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00007969 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007970 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00007971
7972 /* Capture start position */
7973 if (ctxt->record_info) {
7974 node_info.begin_pos = ctxt->input->consumed +
7975 (CUR_PTR - ctxt->input->base);
7976 node_info.begin_line = ctxt->input->line;
7977 }
7978
7979 if (ctxt->spaceNr == 0)
7980 spacePush(ctxt, -1);
7981 else
7982 spacePush(ctxt, *ctxt->space);
7983
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007984 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00007985#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007986 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00007987#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007988 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00007989#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00007990 else
7991 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00007992#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007993 if (name == NULL) {
7994 spacePop(ctxt);
7995 return;
7996 }
7997 namePush(ctxt, name);
7998 ret = ctxt->node;
7999
Daniel Veillard4432df22003-09-28 18:58:27 +00008000#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008001 /*
8002 * [ VC: Root Element Type ]
8003 * The Name in the document type declaration must match the element
8004 * type of the root element.
8005 */
8006 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8007 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8008 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008009#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008010
8011 /*
8012 * Check for an Empty Element.
8013 */
8014 if ((RAW == '/') && (NXT(1) == '>')) {
8015 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008016 if (ctxt->sax2) {
8017 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8018 (!ctxt->disableSAX))
8019 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008020#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008021 } else {
8022 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8023 (!ctxt->disableSAX))
8024 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008025#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008026 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008027 namePop(ctxt);
8028 spacePop(ctxt);
8029 if (nsNr != ctxt->nsNr)
8030 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008031 if ( ret != NULL && ctxt->record_info ) {
8032 node_info.end_pos = ctxt->input->consumed +
8033 (CUR_PTR - ctxt->input->base);
8034 node_info.end_line = ctxt->input->line;
8035 node_info.node = ret;
8036 xmlParserAddNodeInfo(ctxt, &node_info);
8037 }
8038 return;
8039 }
8040 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008041 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008042 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008043 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8044 "Couldn't find end of Start Tag %s line %d\n",
8045 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008046
8047 /*
8048 * end of parsing of this node.
8049 */
8050 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008051 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008052 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008053 if (nsNr != ctxt->nsNr)
8054 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008055
8056 /*
8057 * Capture end position and add node
8058 */
8059 if ( ret != NULL && ctxt->record_info ) {
8060 node_info.end_pos = ctxt->input->consumed +
8061 (CUR_PTR - ctxt->input->base);
8062 node_info.end_line = ctxt->input->line;
8063 node_info.node = ret;
8064 xmlParserAddNodeInfo(ctxt, &node_info);
8065 }
8066 return;
8067 }
8068
8069 /*
8070 * Parse the content of the element:
8071 */
8072 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008073 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008074 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008075 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008076 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008077
8078 /*
8079 * end of parsing of this node.
8080 */
8081 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008082 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008083 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008084 if (nsNr != ctxt->nsNr)
8085 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008086 return;
8087 }
8088
8089 /*
8090 * parse the end of tag: '</' should be here.
8091 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008092 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008093 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008094 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008095 }
8096#ifdef LIBXML_SAX1_ENABLED
8097 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008098 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008099#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008100
8101 /*
8102 * Capture end position and add node
8103 */
8104 if ( ret != NULL && ctxt->record_info ) {
8105 node_info.end_pos = ctxt->input->consumed +
8106 (CUR_PTR - ctxt->input->base);
8107 node_info.end_line = ctxt->input->line;
8108 node_info.node = ret;
8109 xmlParserAddNodeInfo(ctxt, &node_info);
8110 }
8111}
8112
8113/**
8114 * xmlParseVersionNum:
8115 * @ctxt: an XML parser context
8116 *
8117 * parse the XML version value.
8118 *
8119 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8120 *
8121 * Returns the string giving the XML version number, or NULL
8122 */
8123xmlChar *
8124xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8125 xmlChar *buf = NULL;
8126 int len = 0;
8127 int size = 10;
8128 xmlChar cur;
8129
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008130 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008131 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008132 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008133 return(NULL);
8134 }
8135 cur = CUR;
8136 while (((cur >= 'a') && (cur <= 'z')) ||
8137 ((cur >= 'A') && (cur <= 'Z')) ||
8138 ((cur >= '0') && (cur <= '9')) ||
8139 (cur == '_') || (cur == '.') ||
8140 (cur == ':') || (cur == '-')) {
8141 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008142 xmlChar *tmp;
8143
Owen Taylor3473f882001-02-23 17:55:21 +00008144 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008145 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8146 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008147 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 return(NULL);
8149 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008150 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008151 }
8152 buf[len++] = cur;
8153 NEXT;
8154 cur=CUR;
8155 }
8156 buf[len] = 0;
8157 return(buf);
8158}
8159
8160/**
8161 * xmlParseVersionInfo:
8162 * @ctxt: an XML parser context
8163 *
8164 * parse the XML version.
8165 *
8166 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8167 *
8168 * [25] Eq ::= S? '=' S?
8169 *
8170 * Returns the version string, e.g. "1.0"
8171 */
8172
8173xmlChar *
8174xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8175 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008176
Daniel Veillarda07050d2003-10-19 14:46:32 +00008177 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008178 SKIP(7);
8179 SKIP_BLANKS;
8180 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008181 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008182 return(NULL);
8183 }
8184 NEXT;
8185 SKIP_BLANKS;
8186 if (RAW == '"') {
8187 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008188 version = xmlParseVersionNum(ctxt);
8189 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008190 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008191 } else
8192 NEXT;
8193 } else if (RAW == '\''){
8194 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008195 version = xmlParseVersionNum(ctxt);
8196 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008197 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008198 } else
8199 NEXT;
8200 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008201 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008202 }
8203 }
8204 return(version);
8205}
8206
8207/**
8208 * xmlParseEncName:
8209 * @ctxt: an XML parser context
8210 *
8211 * parse the XML encoding name
8212 *
8213 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8214 *
8215 * Returns the encoding name value or NULL
8216 */
8217xmlChar *
8218xmlParseEncName(xmlParserCtxtPtr ctxt) {
8219 xmlChar *buf = NULL;
8220 int len = 0;
8221 int size = 10;
8222 xmlChar cur;
8223
8224 cur = CUR;
8225 if (((cur >= 'a') && (cur <= 'z')) ||
8226 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008227 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008228 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008229 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008230 return(NULL);
8231 }
8232
8233 buf[len++] = cur;
8234 NEXT;
8235 cur = CUR;
8236 while (((cur >= 'a') && (cur <= 'z')) ||
8237 ((cur >= 'A') && (cur <= 'Z')) ||
8238 ((cur >= '0') && (cur <= '9')) ||
8239 (cur == '.') || (cur == '_') ||
8240 (cur == '-')) {
8241 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008242 xmlChar *tmp;
8243
Owen Taylor3473f882001-02-23 17:55:21 +00008244 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008245 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8246 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008247 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008248 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008249 return(NULL);
8250 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008251 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008252 }
8253 buf[len++] = cur;
8254 NEXT;
8255 cur = CUR;
8256 if (cur == 0) {
8257 SHRINK;
8258 GROW;
8259 cur = CUR;
8260 }
8261 }
8262 buf[len] = 0;
8263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008264 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008265 }
8266 return(buf);
8267}
8268
8269/**
8270 * xmlParseEncodingDecl:
8271 * @ctxt: an XML parser context
8272 *
8273 * parse the XML encoding declaration
8274 *
8275 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8276 *
8277 * this setups the conversion filters.
8278 *
8279 * Returns the encoding value or NULL
8280 */
8281
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008282const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008283xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8284 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008285
8286 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008287 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008288 SKIP(8);
8289 SKIP_BLANKS;
8290 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008291 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008292 return(NULL);
8293 }
8294 NEXT;
8295 SKIP_BLANKS;
8296 if (RAW == '"') {
8297 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008298 encoding = xmlParseEncName(ctxt);
8299 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008300 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008301 } else
8302 NEXT;
8303 } else if (RAW == '\''){
8304 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008305 encoding = xmlParseEncName(ctxt);
8306 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008307 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 } else
8309 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008310 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008311 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008312 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008313 /*
8314 * UTF-16 encoding stwich has already taken place at this stage,
8315 * more over the little-endian/big-endian selection is already done
8316 */
8317 if ((encoding != NULL) &&
8318 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8319 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008320 if (ctxt->encoding != NULL)
8321 xmlFree((xmlChar *) ctxt->encoding);
8322 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008323 }
8324 /*
8325 * UTF-8 encoding is handled natively
8326 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008327 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008328 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8329 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008330 if (ctxt->encoding != NULL)
8331 xmlFree((xmlChar *) ctxt->encoding);
8332 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008333 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008334 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008335 xmlCharEncodingHandlerPtr handler;
8336
8337 if (ctxt->input->encoding != NULL)
8338 xmlFree((xmlChar *) ctxt->input->encoding);
8339 ctxt->input->encoding = encoding;
8340
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008341 handler = xmlFindCharEncodingHandler((const char *) encoding);
8342 if (handler != NULL) {
8343 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008344 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008345 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008346 "Unsupported encoding %s\n", encoding);
8347 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008348 }
8349 }
8350 }
8351 return(encoding);
8352}
8353
8354/**
8355 * xmlParseSDDecl:
8356 * @ctxt: an XML parser context
8357 *
8358 * parse the XML standalone declaration
8359 *
8360 * [32] SDDecl ::= S 'standalone' Eq
8361 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8362 *
8363 * [ VC: Standalone Document Declaration ]
8364 * TODO The standalone document declaration must have the value "no"
8365 * if any external markup declarations contain declarations of:
8366 * - attributes with default values, if elements to which these
8367 * attributes apply appear in the document without specifications
8368 * of values for these attributes, or
8369 * - entities (other than amp, lt, gt, apos, quot), if references
8370 * to those entities appear in the document, or
8371 * - attributes with values subject to normalization, where the
8372 * attribute appears in the document with a value which will change
8373 * as a result of normalization, or
8374 * - element types with element content, if white space occurs directly
8375 * within any instance of those types.
8376 *
8377 * Returns 1 if standalone, 0 otherwise
8378 */
8379
8380int
8381xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8382 int standalone = -1;
8383
8384 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008385 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008386 SKIP(10);
8387 SKIP_BLANKS;
8388 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008389 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008390 return(standalone);
8391 }
8392 NEXT;
8393 SKIP_BLANKS;
8394 if (RAW == '\''){
8395 NEXT;
8396 if ((RAW == 'n') && (NXT(1) == 'o')) {
8397 standalone = 0;
8398 SKIP(2);
8399 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8400 (NXT(2) == 's')) {
8401 standalone = 1;
8402 SKIP(3);
8403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008404 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008405 }
8406 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008407 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008408 } else
8409 NEXT;
8410 } else if (RAW == '"'){
8411 NEXT;
8412 if ((RAW == 'n') && (NXT(1) == 'o')) {
8413 standalone = 0;
8414 SKIP(2);
8415 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8416 (NXT(2) == 's')) {
8417 standalone = 1;
8418 SKIP(3);
8419 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008420 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008421 }
8422 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008423 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008424 } else
8425 NEXT;
8426 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008427 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008428 }
8429 }
8430 return(standalone);
8431}
8432
8433/**
8434 * xmlParseXMLDecl:
8435 * @ctxt: an XML parser context
8436 *
8437 * parse an XML declaration header
8438 *
8439 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8440 */
8441
8442void
8443xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8444 xmlChar *version;
8445
8446 /*
8447 * We know that '<?xml' is here.
8448 */
8449 SKIP(5);
8450
William M. Brack76e95df2003-10-18 16:20:14 +00008451 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8453 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008454 }
8455 SKIP_BLANKS;
8456
8457 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008458 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008459 */
8460 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008461 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008462 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008463 } else {
8464 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8465 /*
8466 * TODO: Blueberry should be detected here
8467 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008468 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8469 "Unsupported version '%s'\n",
8470 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008471 }
8472 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008473 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008474 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008475 }
Owen Taylor3473f882001-02-23 17:55:21 +00008476
8477 /*
8478 * We may have the encoding declaration
8479 */
William M. Brack76e95df2003-10-18 16:20:14 +00008480 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008481 if ((RAW == '?') && (NXT(1) == '>')) {
8482 SKIP(2);
8483 return;
8484 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008485 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008486 }
8487 xmlParseEncodingDecl(ctxt);
8488 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8489 /*
8490 * The XML REC instructs us to stop parsing right here
8491 */
8492 return;
8493 }
8494
8495 /*
8496 * We may have the standalone status.
8497 */
William M. Brack76e95df2003-10-18 16:20:14 +00008498 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008499 if ((RAW == '?') && (NXT(1) == '>')) {
8500 SKIP(2);
8501 return;
8502 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008504 }
8505 SKIP_BLANKS;
8506 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8507
8508 SKIP_BLANKS;
8509 if ((RAW == '?') && (NXT(1) == '>')) {
8510 SKIP(2);
8511 } else if (RAW == '>') {
8512 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008513 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008514 NEXT;
8515 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008516 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008517 MOVETO_ENDTAG(CUR_PTR);
8518 NEXT;
8519 }
8520}
8521
8522/**
8523 * xmlParseMisc:
8524 * @ctxt: an XML parser context
8525 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008526 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008527 *
8528 * [27] Misc ::= Comment | PI | S
8529 */
8530
8531void
8532xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008533 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008534 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008535 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008536 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008537 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008538 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008539 NEXT;
8540 } else
8541 xmlParseComment(ctxt);
8542 }
8543}
8544
8545/**
8546 * xmlParseDocument:
8547 * @ctxt: an XML parser context
8548 *
8549 * parse an XML document (and build a tree if using the standard SAX
8550 * interface).
8551 *
8552 * [1] document ::= prolog element Misc*
8553 *
8554 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8555 *
8556 * Returns 0, -1 in case of error. the parser context is augmented
8557 * as a result of the parsing.
8558 */
8559
8560int
8561xmlParseDocument(xmlParserCtxtPtr ctxt) {
8562 xmlChar start[4];
8563 xmlCharEncoding enc;
8564
8565 xmlInitParser();
8566
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008567 if ((ctxt == NULL) || (ctxt->input == NULL))
8568 return(-1);
8569
Owen Taylor3473f882001-02-23 17:55:21 +00008570 GROW;
8571
8572 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 * SAX: detecting the level.
8574 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008575 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008576
8577 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008578 * SAX: beginning of the document processing.
8579 */
8580 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8581 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8582
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008583 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8584 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008585 /*
8586 * Get the 4 first bytes and decode the charset
8587 * if enc != XML_CHAR_ENCODING_NONE
8588 * plug some encoding conversion routines.
8589 */
8590 start[0] = RAW;
8591 start[1] = NXT(1);
8592 start[2] = NXT(2);
8593 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008594 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008595 if (enc != XML_CHAR_ENCODING_NONE) {
8596 xmlSwitchEncoding(ctxt, enc);
8597 }
Owen Taylor3473f882001-02-23 17:55:21 +00008598 }
8599
8600
8601 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008602 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 }
8604
8605 /*
8606 * Check for the XMLDecl in the Prolog.
8607 */
8608 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008609 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008610
8611 /*
8612 * Note that we will switch encoding on the fly.
8613 */
8614 xmlParseXMLDecl(ctxt);
8615 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8616 /*
8617 * The XML REC instructs us to stop parsing right here
8618 */
8619 return(-1);
8620 }
8621 ctxt->standalone = ctxt->input->standalone;
8622 SKIP_BLANKS;
8623 } else {
8624 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8625 }
8626 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8627 ctxt->sax->startDocument(ctxt->userData);
8628
8629 /*
8630 * The Misc part of the Prolog
8631 */
8632 GROW;
8633 xmlParseMisc(ctxt);
8634
8635 /*
8636 * Then possibly doc type declaration(s) and more Misc
8637 * (doctypedecl Misc*)?
8638 */
8639 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008640 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008641
8642 ctxt->inSubset = 1;
8643 xmlParseDocTypeDecl(ctxt);
8644 if (RAW == '[') {
8645 ctxt->instate = XML_PARSER_DTD;
8646 xmlParseInternalSubset(ctxt);
8647 }
8648
8649 /*
8650 * Create and update the external subset.
8651 */
8652 ctxt->inSubset = 2;
8653 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8654 (!ctxt->disableSAX))
8655 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8656 ctxt->extSubSystem, ctxt->extSubURI);
8657 ctxt->inSubset = 0;
8658
8659
8660 ctxt->instate = XML_PARSER_PROLOG;
8661 xmlParseMisc(ctxt);
8662 }
8663
8664 /*
8665 * Time to start parsing the tree itself
8666 */
8667 GROW;
8668 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008669 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8670 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008671 } else {
8672 ctxt->instate = XML_PARSER_CONTENT;
8673 xmlParseElement(ctxt);
8674 ctxt->instate = XML_PARSER_EPILOG;
8675
8676
8677 /*
8678 * The Misc part at the end
8679 */
8680 xmlParseMisc(ctxt);
8681
Daniel Veillard561b7f82002-03-20 21:55:57 +00008682 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008684 }
8685 ctxt->instate = XML_PARSER_EOF;
8686 }
8687
8688 /*
8689 * SAX: end of the document processing.
8690 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008691 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008692 ctxt->sax->endDocument(ctxt->userData);
8693
Daniel Veillard5997aca2002-03-18 18:36:20 +00008694 /*
8695 * Remove locally kept entity definitions if the tree was not built
8696 */
8697 if ((ctxt->myDoc != NULL) &&
8698 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8699 xmlFreeDoc(ctxt->myDoc);
8700 ctxt->myDoc = NULL;
8701 }
8702
Daniel Veillardc7612992002-02-17 22:47:37 +00008703 if (! ctxt->wellFormed) {
8704 ctxt->valid = 0;
8705 return(-1);
8706 }
Owen Taylor3473f882001-02-23 17:55:21 +00008707 return(0);
8708}
8709
8710/**
8711 * xmlParseExtParsedEnt:
8712 * @ctxt: an XML parser context
8713 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008714 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008715 * An external general parsed entity is well-formed if it matches the
8716 * production labeled extParsedEnt.
8717 *
8718 * [78] extParsedEnt ::= TextDecl? content
8719 *
8720 * Returns 0, -1 in case of error. the parser context is augmented
8721 * as a result of the parsing.
8722 */
8723
8724int
8725xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8726 xmlChar start[4];
8727 xmlCharEncoding enc;
8728
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008729 if ((ctxt == NULL) || (ctxt->input == NULL))
8730 return(-1);
8731
Owen Taylor3473f882001-02-23 17:55:21 +00008732 xmlDefaultSAXHandlerInit();
8733
Daniel Veillard309f81d2003-09-23 09:02:53 +00008734 xmlDetectSAX2(ctxt);
8735
Owen Taylor3473f882001-02-23 17:55:21 +00008736 GROW;
8737
8738 /*
8739 * SAX: beginning of the document processing.
8740 */
8741 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8742 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8743
8744 /*
8745 * Get the 4 first bytes and decode the charset
8746 * if enc != XML_CHAR_ENCODING_NONE
8747 * plug some encoding conversion routines.
8748 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008749 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8750 start[0] = RAW;
8751 start[1] = NXT(1);
8752 start[2] = NXT(2);
8753 start[3] = NXT(3);
8754 enc = xmlDetectCharEncoding(start, 4);
8755 if (enc != XML_CHAR_ENCODING_NONE) {
8756 xmlSwitchEncoding(ctxt, enc);
8757 }
Owen Taylor3473f882001-02-23 17:55:21 +00008758 }
8759
8760
8761 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008762 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008763 }
8764
8765 /*
8766 * Check for the XMLDecl in the Prolog.
8767 */
8768 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008769 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008770
8771 /*
8772 * Note that we will switch encoding on the fly.
8773 */
8774 xmlParseXMLDecl(ctxt);
8775 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8776 /*
8777 * The XML REC instructs us to stop parsing right here
8778 */
8779 return(-1);
8780 }
8781 SKIP_BLANKS;
8782 } else {
8783 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8784 }
8785 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8786 ctxt->sax->startDocument(ctxt->userData);
8787
8788 /*
8789 * Doing validity checking on chunk doesn't make sense
8790 */
8791 ctxt->instate = XML_PARSER_CONTENT;
8792 ctxt->validate = 0;
8793 ctxt->loadsubset = 0;
8794 ctxt->depth = 0;
8795
8796 xmlParseContent(ctxt);
8797
8798 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008799 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008800 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008801 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008802 }
8803
8804 /*
8805 * SAX: end of the document processing.
8806 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008807 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008808 ctxt->sax->endDocument(ctxt->userData);
8809
8810 if (! ctxt->wellFormed) return(-1);
8811 return(0);
8812}
8813
Daniel Veillard73b013f2003-09-30 12:36:01 +00008814#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008815/************************************************************************
8816 * *
8817 * Progressive parsing interfaces *
8818 * *
8819 ************************************************************************/
8820
8821/**
8822 * xmlParseLookupSequence:
8823 * @ctxt: an XML parser context
8824 * @first: the first char to lookup
8825 * @next: the next char to lookup or zero
8826 * @third: the next char to lookup or zero
8827 *
8828 * Try to find if a sequence (first, next, third) or just (first next) or
8829 * (first) is available in the input stream.
8830 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
8831 * to avoid rescanning sequences of bytes, it DOES change the state of the
8832 * parser, do not use liberally.
8833 *
8834 * Returns the index to the current parsing point if the full sequence
8835 * is available, -1 otherwise.
8836 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008837static int
Owen Taylor3473f882001-02-23 17:55:21 +00008838xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
8839 xmlChar next, xmlChar third) {
8840 int base, len;
8841 xmlParserInputPtr in;
8842 const xmlChar *buf;
8843
8844 in = ctxt->input;
8845 if (in == NULL) return(-1);
8846 base = in->cur - in->base;
8847 if (base < 0) return(-1);
8848 if (ctxt->checkIndex > base)
8849 base = ctxt->checkIndex;
8850 if (in->buf == NULL) {
8851 buf = in->base;
8852 len = in->length;
8853 } else {
8854 buf = in->buf->buffer->content;
8855 len = in->buf->buffer->use;
8856 }
8857 /* take into account the sequence length */
8858 if (third) len -= 2;
8859 else if (next) len --;
8860 for (;base < len;base++) {
8861 if (buf[base] == first) {
8862 if (third != 0) {
8863 if ((buf[base + 1] != next) ||
8864 (buf[base + 2] != third)) continue;
8865 } else if (next != 0) {
8866 if (buf[base + 1] != next) continue;
8867 }
8868 ctxt->checkIndex = 0;
8869#ifdef DEBUG_PUSH
8870 if (next == 0)
8871 xmlGenericError(xmlGenericErrorContext,
8872 "PP: lookup '%c' found at %d\n",
8873 first, base);
8874 else if (third == 0)
8875 xmlGenericError(xmlGenericErrorContext,
8876 "PP: lookup '%c%c' found at %d\n",
8877 first, next, base);
8878 else
8879 xmlGenericError(xmlGenericErrorContext,
8880 "PP: lookup '%c%c%c' found at %d\n",
8881 first, next, third, base);
8882#endif
8883 return(base - (in->cur - in->base));
8884 }
8885 }
8886 ctxt->checkIndex = base;
8887#ifdef DEBUG_PUSH
8888 if (next == 0)
8889 xmlGenericError(xmlGenericErrorContext,
8890 "PP: lookup '%c' failed\n", first);
8891 else if (third == 0)
8892 xmlGenericError(xmlGenericErrorContext,
8893 "PP: lookup '%c%c' failed\n", first, next);
8894 else
8895 xmlGenericError(xmlGenericErrorContext,
8896 "PP: lookup '%c%c%c' failed\n", first, next, third);
8897#endif
8898 return(-1);
8899}
8900
8901/**
Daniel Veillarda880b122003-04-21 21:36:41 +00008902 * xmlParseGetLasts:
8903 * @ctxt: an XML parser context
8904 * @lastlt: pointer to store the last '<' from the input
8905 * @lastgt: pointer to store the last '>' from the input
8906 *
8907 * Lookup the last < and > in the current chunk
8908 */
8909static void
8910xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
8911 const xmlChar **lastgt) {
8912 const xmlChar *tmp;
8913
8914 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
8915 xmlGenericError(xmlGenericErrorContext,
8916 "Internal error: xmlParseGetLasts\n");
8917 return;
8918 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00008919 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00008920 tmp = ctxt->input->end;
8921 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00008922 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00008923 if (tmp < ctxt->input->base) {
8924 *lastlt = NULL;
8925 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00008926 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00008927 *lastlt = tmp;
8928 tmp++;
8929 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
8930 if (*tmp == '\'') {
8931 tmp++;
8932 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
8933 if (tmp < ctxt->input->end) tmp++;
8934 } else if (*tmp == '"') {
8935 tmp++;
8936 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
8937 if (tmp < ctxt->input->end) tmp++;
8938 } else
8939 tmp++;
8940 }
8941 if (tmp < ctxt->input->end)
8942 *lastgt = tmp;
8943 else {
8944 tmp = *lastlt;
8945 tmp--;
8946 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
8947 if (tmp >= ctxt->input->base)
8948 *lastgt = tmp;
8949 else
8950 *lastgt = NULL;
8951 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008952 }
Daniel Veillarda880b122003-04-21 21:36:41 +00008953 } else {
8954 *lastlt = NULL;
8955 *lastgt = NULL;
8956 }
8957}
8958/**
Owen Taylor3473f882001-02-23 17:55:21 +00008959 * xmlParseTryOrFinish:
8960 * @ctxt: an XML parser context
8961 * @terminate: last chunk indicator
8962 *
8963 * Try to progress on parsing
8964 *
8965 * Returns zero if no parsing was possible
8966 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008967static int
Owen Taylor3473f882001-02-23 17:55:21 +00008968xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
8969 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008970 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008971 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00008972 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00008973
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008974 if (ctxt->input == NULL)
8975 return(0);
8976
Owen Taylor3473f882001-02-23 17:55:21 +00008977#ifdef DEBUG_PUSH
8978 switch (ctxt->instate) {
8979 case XML_PARSER_EOF:
8980 xmlGenericError(xmlGenericErrorContext,
8981 "PP: try EOF\n"); break;
8982 case XML_PARSER_START:
8983 xmlGenericError(xmlGenericErrorContext,
8984 "PP: try START\n"); break;
8985 case XML_PARSER_MISC:
8986 xmlGenericError(xmlGenericErrorContext,
8987 "PP: try MISC\n");break;
8988 case XML_PARSER_COMMENT:
8989 xmlGenericError(xmlGenericErrorContext,
8990 "PP: try COMMENT\n");break;
8991 case XML_PARSER_PROLOG:
8992 xmlGenericError(xmlGenericErrorContext,
8993 "PP: try PROLOG\n");break;
8994 case XML_PARSER_START_TAG:
8995 xmlGenericError(xmlGenericErrorContext,
8996 "PP: try START_TAG\n");break;
8997 case XML_PARSER_CONTENT:
8998 xmlGenericError(xmlGenericErrorContext,
8999 "PP: try CONTENT\n");break;
9000 case XML_PARSER_CDATA_SECTION:
9001 xmlGenericError(xmlGenericErrorContext,
9002 "PP: try CDATA_SECTION\n");break;
9003 case XML_PARSER_END_TAG:
9004 xmlGenericError(xmlGenericErrorContext,
9005 "PP: try END_TAG\n");break;
9006 case XML_PARSER_ENTITY_DECL:
9007 xmlGenericError(xmlGenericErrorContext,
9008 "PP: try ENTITY_DECL\n");break;
9009 case XML_PARSER_ENTITY_VALUE:
9010 xmlGenericError(xmlGenericErrorContext,
9011 "PP: try ENTITY_VALUE\n");break;
9012 case XML_PARSER_ATTRIBUTE_VALUE:
9013 xmlGenericError(xmlGenericErrorContext,
9014 "PP: try ATTRIBUTE_VALUE\n");break;
9015 case XML_PARSER_DTD:
9016 xmlGenericError(xmlGenericErrorContext,
9017 "PP: try DTD\n");break;
9018 case XML_PARSER_EPILOG:
9019 xmlGenericError(xmlGenericErrorContext,
9020 "PP: try EPILOG\n");break;
9021 case XML_PARSER_PI:
9022 xmlGenericError(xmlGenericErrorContext,
9023 "PP: try PI\n");break;
9024 case XML_PARSER_IGNORE:
9025 xmlGenericError(xmlGenericErrorContext,
9026 "PP: try IGNORE\n");break;
9027 }
9028#endif
9029
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009030 if ((ctxt->input != NULL) &&
9031 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009032 xmlSHRINK(ctxt);
9033 ctxt->checkIndex = 0;
9034 }
9035 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009036
Daniel Veillarda880b122003-04-21 21:36:41 +00009037 while (1) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009038 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9039 return(0);
9040
9041
Owen Taylor3473f882001-02-23 17:55:21 +00009042 /*
9043 * Pop-up of finished entities.
9044 */
9045 while ((RAW == 0) && (ctxt->inputNr > 1))
9046 xmlPopInput(ctxt);
9047
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009048 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009049 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009050 avail = ctxt->input->length -
9051 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009052 else {
9053 /*
9054 * If we are operating on converted input, try to flush
9055 * remainng chars to avoid them stalling in the non-converted
9056 * buffer.
9057 */
9058 if ((ctxt->input->buf->raw != NULL) &&
9059 (ctxt->input->buf->raw->use > 0)) {
9060 int base = ctxt->input->base -
9061 ctxt->input->buf->buffer->content;
9062 int current = ctxt->input->cur - ctxt->input->base;
9063
9064 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9065 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9066 ctxt->input->cur = ctxt->input->base + current;
9067 ctxt->input->end =
9068 &ctxt->input->buf->buffer->content[
9069 ctxt->input->buf->buffer->use];
9070 }
9071 avail = ctxt->input->buf->buffer->use -
9072 (ctxt->input->cur - ctxt->input->base);
9073 }
Owen Taylor3473f882001-02-23 17:55:21 +00009074 if (avail < 1)
9075 goto done;
9076 switch (ctxt->instate) {
9077 case XML_PARSER_EOF:
9078 /*
9079 * Document parsing is done !
9080 */
9081 goto done;
9082 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009083 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9084 xmlChar start[4];
9085 xmlCharEncoding enc;
9086
9087 /*
9088 * Very first chars read from the document flow.
9089 */
9090 if (avail < 4)
9091 goto done;
9092
9093 /*
9094 * Get the 4 first bytes and decode the charset
9095 * if enc != XML_CHAR_ENCODING_NONE
9096 * plug some encoding conversion routines.
9097 */
9098 start[0] = RAW;
9099 start[1] = NXT(1);
9100 start[2] = NXT(2);
9101 start[3] = NXT(3);
9102 enc = xmlDetectCharEncoding(start, 4);
9103 if (enc != XML_CHAR_ENCODING_NONE) {
9104 xmlSwitchEncoding(ctxt, enc);
9105 }
9106 break;
9107 }
Owen Taylor3473f882001-02-23 17:55:21 +00009108
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009109 if (avail < 2)
9110 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009111 cur = ctxt->input->cur[0];
9112 next = ctxt->input->cur[1];
9113 if (cur == 0) {
9114 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9115 ctxt->sax->setDocumentLocator(ctxt->userData,
9116 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009117 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009118 ctxt->instate = XML_PARSER_EOF;
9119#ifdef DEBUG_PUSH
9120 xmlGenericError(xmlGenericErrorContext,
9121 "PP: entering EOF\n");
9122#endif
9123 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9124 ctxt->sax->endDocument(ctxt->userData);
9125 goto done;
9126 }
9127 if ((cur == '<') && (next == '?')) {
9128 /* PI or XML decl */
9129 if (avail < 5) return(ret);
9130 if ((!terminate) &&
9131 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9132 return(ret);
9133 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9134 ctxt->sax->setDocumentLocator(ctxt->userData,
9135 &xmlDefaultSAXLocator);
9136 if ((ctxt->input->cur[2] == 'x') &&
9137 (ctxt->input->cur[3] == 'm') &&
9138 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009139 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009140 ret += 5;
9141#ifdef DEBUG_PUSH
9142 xmlGenericError(xmlGenericErrorContext,
9143 "PP: Parsing XML Decl\n");
9144#endif
9145 xmlParseXMLDecl(ctxt);
9146 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9147 /*
9148 * The XML REC instructs us to stop parsing right
9149 * here
9150 */
9151 ctxt->instate = XML_PARSER_EOF;
9152 return(0);
9153 }
9154 ctxt->standalone = ctxt->input->standalone;
9155 if ((ctxt->encoding == NULL) &&
9156 (ctxt->input->encoding != NULL))
9157 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9158 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9159 (!ctxt->disableSAX))
9160 ctxt->sax->startDocument(ctxt->userData);
9161 ctxt->instate = XML_PARSER_MISC;
9162#ifdef DEBUG_PUSH
9163 xmlGenericError(xmlGenericErrorContext,
9164 "PP: entering MISC\n");
9165#endif
9166 } else {
9167 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9168 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9169 (!ctxt->disableSAX))
9170 ctxt->sax->startDocument(ctxt->userData);
9171 ctxt->instate = XML_PARSER_MISC;
9172#ifdef DEBUG_PUSH
9173 xmlGenericError(xmlGenericErrorContext,
9174 "PP: entering MISC\n");
9175#endif
9176 }
9177 } else {
9178 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9179 ctxt->sax->setDocumentLocator(ctxt->userData,
9180 &xmlDefaultSAXLocator);
9181 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009182 if (ctxt->version == NULL) {
9183 xmlErrMemory(ctxt, NULL);
9184 break;
9185 }
Owen Taylor3473f882001-02-23 17:55:21 +00009186 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9187 (!ctxt->disableSAX))
9188 ctxt->sax->startDocument(ctxt->userData);
9189 ctxt->instate = XML_PARSER_MISC;
9190#ifdef DEBUG_PUSH
9191 xmlGenericError(xmlGenericErrorContext,
9192 "PP: entering MISC\n");
9193#endif
9194 }
9195 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009196 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009197 const xmlChar *name;
9198 const xmlChar *prefix;
9199 const xmlChar *URI;
9200 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009201
9202 if ((avail < 2) && (ctxt->inputNr == 1))
9203 goto done;
9204 cur = ctxt->input->cur[0];
9205 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009206 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009207 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009208 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9209 ctxt->sax->endDocument(ctxt->userData);
9210 goto done;
9211 }
9212 if (!terminate) {
9213 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009214 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009215 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009216 goto done;
9217 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9218 goto done;
9219 }
9220 }
9221 if (ctxt->spaceNr == 0)
9222 spacePush(ctxt, -1);
9223 else
9224 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009225#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009226 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009227#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009228 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009229#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009230 else
9231 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009232#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009233 if (name == NULL) {
9234 spacePop(ctxt);
9235 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009236 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9237 ctxt->sax->endDocument(ctxt->userData);
9238 goto done;
9239 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009240#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009241 /*
9242 * [ VC: Root Element Type ]
9243 * The Name in the document type declaration must match
9244 * the element type of the root element.
9245 */
9246 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9247 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9248 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009249#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009250
9251 /*
9252 * Check for an Empty Element.
9253 */
9254 if ((RAW == '/') && (NXT(1) == '>')) {
9255 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009256
9257 if (ctxt->sax2) {
9258 if ((ctxt->sax != NULL) &&
9259 (ctxt->sax->endElementNs != NULL) &&
9260 (!ctxt->disableSAX))
9261 ctxt->sax->endElementNs(ctxt->userData, name,
9262 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009263 if (ctxt->nsNr - nsNr > 0)
9264 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009265#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009266 } else {
9267 if ((ctxt->sax != NULL) &&
9268 (ctxt->sax->endElement != NULL) &&
9269 (!ctxt->disableSAX))
9270 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009271#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009272 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009273 spacePop(ctxt);
9274 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009275 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009276 } else {
9277 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009278 }
9279 break;
9280 }
9281 if (RAW == '>') {
9282 NEXT;
9283 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009284 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009285 "Couldn't find end of Start Tag %s\n",
9286 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009287 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009288 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009289 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009290 if (ctxt->sax2)
9291 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009292#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009293 else
9294 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009295#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009296
Daniel Veillarda880b122003-04-21 21:36:41 +00009297 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009298 break;
9299 }
9300 case XML_PARSER_CONTENT: {
9301 const xmlChar *test;
9302 unsigned int cons;
9303 if ((avail < 2) && (ctxt->inputNr == 1))
9304 goto done;
9305 cur = ctxt->input->cur[0];
9306 next = ctxt->input->cur[1];
9307
9308 test = CUR_PTR;
9309 cons = ctxt->input->consumed;
9310 if ((cur == '<') && (next == '/')) {
9311 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009312 break;
9313 } else if ((cur == '<') && (next == '?')) {
9314 if ((!terminate) &&
9315 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9316 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009317 xmlParsePI(ctxt);
9318 } else if ((cur == '<') && (next != '!')) {
9319 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009320 break;
9321 } else if ((cur == '<') && (next == '!') &&
9322 (ctxt->input->cur[2] == '-') &&
9323 (ctxt->input->cur[3] == '-')) {
9324 if ((!terminate) &&
9325 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9326 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009327 xmlParseComment(ctxt);
9328 ctxt->instate = XML_PARSER_CONTENT;
9329 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9330 (ctxt->input->cur[2] == '[') &&
9331 (ctxt->input->cur[3] == 'C') &&
9332 (ctxt->input->cur[4] == 'D') &&
9333 (ctxt->input->cur[5] == 'A') &&
9334 (ctxt->input->cur[6] == 'T') &&
9335 (ctxt->input->cur[7] == 'A') &&
9336 (ctxt->input->cur[8] == '[')) {
9337 SKIP(9);
9338 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009339 break;
9340 } else if ((cur == '<') && (next == '!') &&
9341 (avail < 9)) {
9342 goto done;
9343 } else if (cur == '&') {
9344 if ((!terminate) &&
9345 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9346 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009347 xmlParseReference(ctxt);
9348 } else {
9349 /* TODO Avoid the extra copy, handle directly !!! */
9350 /*
9351 * Goal of the following test is:
9352 * - minimize calls to the SAX 'character' callback
9353 * when they are mergeable
9354 * - handle an problem for isBlank when we only parse
9355 * a sequence of blank chars and the next one is
9356 * not available to check against '<' presence.
9357 * - tries to homogenize the differences in SAX
9358 * callbacks between the push and pull versions
9359 * of the parser.
9360 */
9361 if ((ctxt->inputNr == 1) &&
9362 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9363 if (!terminate) {
9364 if (ctxt->progressive) {
9365 if ((lastlt == NULL) ||
9366 (ctxt->input->cur > lastlt))
9367 goto done;
9368 } else if (xmlParseLookupSequence(ctxt,
9369 '<', 0, 0) < 0) {
9370 goto done;
9371 }
9372 }
9373 }
9374 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009375 xmlParseCharData(ctxt, 0);
9376 }
9377 /*
9378 * Pop-up of finished entities.
9379 */
9380 while ((RAW == 0) && (ctxt->inputNr > 1))
9381 xmlPopInput(ctxt);
9382 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009383 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9384 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009385 ctxt->instate = XML_PARSER_EOF;
9386 break;
9387 }
9388 break;
9389 }
9390 case XML_PARSER_END_TAG:
9391 if (avail < 2)
9392 goto done;
9393 if (!terminate) {
9394 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009395 /* > can be found unescaped in attribute values */
9396 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009397 goto done;
9398 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9399 goto done;
9400 }
9401 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009402 if (ctxt->sax2) {
9403 xmlParseEndTag2(ctxt,
9404 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9405 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009406 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009407 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009408 }
9409#ifdef LIBXML_SAX1_ENABLED
9410 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009411 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009412#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009413 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009414 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009415 } else {
9416 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009417 }
9418 break;
9419 case XML_PARSER_CDATA_SECTION: {
9420 /*
9421 * The Push mode need to have the SAX callback for
9422 * cdataBlock merge back contiguous callbacks.
9423 */
9424 int base;
9425
9426 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9427 if (base < 0) {
9428 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9430 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009431 ctxt->sax->cdataBlock(ctxt->userData,
9432 ctxt->input->cur,
9433 XML_PARSER_BIG_BUFFER_SIZE);
9434 else if (ctxt->sax->characters != NULL)
9435 ctxt->sax->characters(ctxt->userData,
9436 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009437 XML_PARSER_BIG_BUFFER_SIZE);
9438 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009439 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009440 ctxt->checkIndex = 0;
9441 }
9442 goto done;
9443 } else {
9444 if ((ctxt->sax != NULL) && (base > 0) &&
9445 (!ctxt->disableSAX)) {
9446 if (ctxt->sax->cdataBlock != NULL)
9447 ctxt->sax->cdataBlock(ctxt->userData,
9448 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009449 else if (ctxt->sax->characters != NULL)
9450 ctxt->sax->characters(ctxt->userData,
9451 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009452 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009453 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009454 ctxt->checkIndex = 0;
9455 ctxt->instate = XML_PARSER_CONTENT;
9456#ifdef DEBUG_PUSH
9457 xmlGenericError(xmlGenericErrorContext,
9458 "PP: entering CONTENT\n");
9459#endif
9460 }
9461 break;
9462 }
Owen Taylor3473f882001-02-23 17:55:21 +00009463 case XML_PARSER_MISC:
9464 SKIP_BLANKS;
9465 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 avail = ctxt->input->length -
9467 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009468 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009469 avail = ctxt->input->buf->buffer->use -
9470 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009471 if (avail < 2)
9472 goto done;
9473 cur = ctxt->input->cur[0];
9474 next = ctxt->input->cur[1];
9475 if ((cur == '<') && (next == '?')) {
9476 if ((!terminate) &&
9477 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9478 goto done;
9479#ifdef DEBUG_PUSH
9480 xmlGenericError(xmlGenericErrorContext,
9481 "PP: Parsing PI\n");
9482#endif
9483 xmlParsePI(ctxt);
9484 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009485 (ctxt->input->cur[2] == '-') &&
9486 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009487 if ((!terminate) &&
9488 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9489 goto done;
9490#ifdef DEBUG_PUSH
9491 xmlGenericError(xmlGenericErrorContext,
9492 "PP: Parsing Comment\n");
9493#endif
9494 xmlParseComment(ctxt);
9495 ctxt->instate = XML_PARSER_MISC;
9496 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009497 (ctxt->input->cur[2] == 'D') &&
9498 (ctxt->input->cur[3] == 'O') &&
9499 (ctxt->input->cur[4] == 'C') &&
9500 (ctxt->input->cur[5] == 'T') &&
9501 (ctxt->input->cur[6] == 'Y') &&
9502 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009503 (ctxt->input->cur[8] == 'E')) {
9504 if ((!terminate) &&
9505 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9506 goto done;
9507#ifdef DEBUG_PUSH
9508 xmlGenericError(xmlGenericErrorContext,
9509 "PP: Parsing internal subset\n");
9510#endif
9511 ctxt->inSubset = 1;
9512 xmlParseDocTypeDecl(ctxt);
9513 if (RAW == '[') {
9514 ctxt->instate = XML_PARSER_DTD;
9515#ifdef DEBUG_PUSH
9516 xmlGenericError(xmlGenericErrorContext,
9517 "PP: entering DTD\n");
9518#endif
9519 } else {
9520 /*
9521 * Create and update the external subset.
9522 */
9523 ctxt->inSubset = 2;
9524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9525 (ctxt->sax->externalSubset != NULL))
9526 ctxt->sax->externalSubset(ctxt->userData,
9527 ctxt->intSubName, ctxt->extSubSystem,
9528 ctxt->extSubURI);
9529 ctxt->inSubset = 0;
9530 ctxt->instate = XML_PARSER_PROLOG;
9531#ifdef DEBUG_PUSH
9532 xmlGenericError(xmlGenericErrorContext,
9533 "PP: entering PROLOG\n");
9534#endif
9535 }
9536 } else if ((cur == '<') && (next == '!') &&
9537 (avail < 9)) {
9538 goto done;
9539 } else {
9540 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009541 ctxt->progressive = 1;
9542 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009543#ifdef DEBUG_PUSH
9544 xmlGenericError(xmlGenericErrorContext,
9545 "PP: entering START_TAG\n");
9546#endif
9547 }
9548 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009549 case XML_PARSER_PROLOG:
9550 SKIP_BLANKS;
9551 if (ctxt->input->buf == NULL)
9552 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9553 else
9554 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9555 if (avail < 2)
9556 goto done;
9557 cur = ctxt->input->cur[0];
9558 next = ctxt->input->cur[1];
9559 if ((cur == '<') && (next == '?')) {
9560 if ((!terminate) &&
9561 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9562 goto done;
9563#ifdef DEBUG_PUSH
9564 xmlGenericError(xmlGenericErrorContext,
9565 "PP: Parsing PI\n");
9566#endif
9567 xmlParsePI(ctxt);
9568 } else if ((cur == '<') && (next == '!') &&
9569 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9570 if ((!terminate) &&
9571 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9572 goto done;
9573#ifdef DEBUG_PUSH
9574 xmlGenericError(xmlGenericErrorContext,
9575 "PP: Parsing Comment\n");
9576#endif
9577 xmlParseComment(ctxt);
9578 ctxt->instate = XML_PARSER_PROLOG;
9579 } else if ((cur == '<') && (next == '!') &&
9580 (avail < 4)) {
9581 goto done;
9582 } else {
9583 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009584 if (ctxt->progressive == 0)
9585 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009586 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009587#ifdef DEBUG_PUSH
9588 xmlGenericError(xmlGenericErrorContext,
9589 "PP: entering START_TAG\n");
9590#endif
9591 }
9592 break;
9593 case XML_PARSER_EPILOG:
9594 SKIP_BLANKS;
9595 if (ctxt->input->buf == NULL)
9596 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9597 else
9598 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9599 if (avail < 2)
9600 goto done;
9601 cur = ctxt->input->cur[0];
9602 next = ctxt->input->cur[1];
9603 if ((cur == '<') && (next == '?')) {
9604 if ((!terminate) &&
9605 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9606 goto done;
9607#ifdef DEBUG_PUSH
9608 xmlGenericError(xmlGenericErrorContext,
9609 "PP: Parsing PI\n");
9610#endif
9611 xmlParsePI(ctxt);
9612 ctxt->instate = XML_PARSER_EPILOG;
9613 } else if ((cur == '<') && (next == '!') &&
9614 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9615 if ((!terminate) &&
9616 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9617 goto done;
9618#ifdef DEBUG_PUSH
9619 xmlGenericError(xmlGenericErrorContext,
9620 "PP: Parsing Comment\n");
9621#endif
9622 xmlParseComment(ctxt);
9623 ctxt->instate = XML_PARSER_EPILOG;
9624 } else if ((cur == '<') && (next == '!') &&
9625 (avail < 4)) {
9626 goto done;
9627 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009628 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009629 ctxt->instate = XML_PARSER_EOF;
9630#ifdef DEBUG_PUSH
9631 xmlGenericError(xmlGenericErrorContext,
9632 "PP: entering EOF\n");
9633#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009634 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009635 ctxt->sax->endDocument(ctxt->userData);
9636 goto done;
9637 }
9638 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009639 case XML_PARSER_DTD: {
9640 /*
9641 * Sorry but progressive parsing of the internal subset
9642 * is not expected to be supported. We first check that
9643 * the full content of the internal subset is available and
9644 * the parsing is launched only at that point.
9645 * Internal subset ends up with "']' S? '>'" in an unescaped
9646 * section and not in a ']]>' sequence which are conditional
9647 * sections (whoever argued to keep that crap in XML deserve
9648 * a place in hell !).
9649 */
9650 int base, i;
9651 xmlChar *buf;
9652 xmlChar quote = 0;
9653
9654 base = ctxt->input->cur - ctxt->input->base;
9655 if (base < 0) return(0);
9656 if (ctxt->checkIndex > base)
9657 base = ctxt->checkIndex;
9658 buf = ctxt->input->buf->buffer->content;
9659 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9660 base++) {
9661 if (quote != 0) {
9662 if (buf[base] == quote)
9663 quote = 0;
9664 continue;
9665 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009666 if ((quote == 0) && (buf[base] == '<')) {
9667 int found = 0;
9668 /* special handling of comments */
9669 if (((unsigned int) base + 4 <
9670 ctxt->input->buf->buffer->use) &&
9671 (buf[base + 1] == '!') &&
9672 (buf[base + 2] == '-') &&
9673 (buf[base + 3] == '-')) {
9674 for (;(unsigned int) base + 3 <
9675 ctxt->input->buf->buffer->use; base++) {
9676 if ((buf[base] == '-') &&
9677 (buf[base + 1] == '-') &&
9678 (buf[base + 2] == '>')) {
9679 found = 1;
9680 base += 2;
9681 break;
9682 }
9683 }
9684 if (!found)
9685 break;
9686 continue;
9687 }
9688 }
Owen Taylor3473f882001-02-23 17:55:21 +00009689 if (buf[base] == '"') {
9690 quote = '"';
9691 continue;
9692 }
9693 if (buf[base] == '\'') {
9694 quote = '\'';
9695 continue;
9696 }
9697 if (buf[base] == ']') {
9698 if ((unsigned int) base +1 >=
9699 ctxt->input->buf->buffer->use)
9700 break;
9701 if (buf[base + 1] == ']') {
9702 /* conditional crap, skip both ']' ! */
9703 base++;
9704 continue;
9705 }
9706 for (i = 0;
9707 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9708 i++) {
9709 if (buf[base + i] == '>')
9710 goto found_end_int_subset;
9711 }
9712 break;
9713 }
9714 }
9715 /*
9716 * We didn't found the end of the Internal subset
9717 */
9718 if (quote == 0)
9719 ctxt->checkIndex = base;
9720#ifdef DEBUG_PUSH
9721 if (next == 0)
9722 xmlGenericError(xmlGenericErrorContext,
9723 "PP: lookup of int subset end filed\n");
9724#endif
9725 goto done;
9726
9727found_end_int_subset:
9728 xmlParseInternalSubset(ctxt);
9729 ctxt->inSubset = 2;
9730 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9731 (ctxt->sax->externalSubset != NULL))
9732 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9733 ctxt->extSubSystem, ctxt->extSubURI);
9734 ctxt->inSubset = 0;
9735 ctxt->instate = XML_PARSER_PROLOG;
9736 ctxt->checkIndex = 0;
9737#ifdef DEBUG_PUSH
9738 xmlGenericError(xmlGenericErrorContext,
9739 "PP: entering PROLOG\n");
9740#endif
9741 break;
9742 }
9743 case XML_PARSER_COMMENT:
9744 xmlGenericError(xmlGenericErrorContext,
9745 "PP: internal error, state == COMMENT\n");
9746 ctxt->instate = XML_PARSER_CONTENT;
9747#ifdef DEBUG_PUSH
9748 xmlGenericError(xmlGenericErrorContext,
9749 "PP: entering CONTENT\n");
9750#endif
9751 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009752 case XML_PARSER_IGNORE:
9753 xmlGenericError(xmlGenericErrorContext,
9754 "PP: internal error, state == IGNORE");
9755 ctxt->instate = XML_PARSER_DTD;
9756#ifdef DEBUG_PUSH
9757 xmlGenericError(xmlGenericErrorContext,
9758 "PP: entering DTD\n");
9759#endif
9760 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009761 case XML_PARSER_PI:
9762 xmlGenericError(xmlGenericErrorContext,
9763 "PP: internal error, state == PI\n");
9764 ctxt->instate = XML_PARSER_CONTENT;
9765#ifdef DEBUG_PUSH
9766 xmlGenericError(xmlGenericErrorContext,
9767 "PP: entering CONTENT\n");
9768#endif
9769 break;
9770 case XML_PARSER_ENTITY_DECL:
9771 xmlGenericError(xmlGenericErrorContext,
9772 "PP: internal error, state == ENTITY_DECL\n");
9773 ctxt->instate = XML_PARSER_DTD;
9774#ifdef DEBUG_PUSH
9775 xmlGenericError(xmlGenericErrorContext,
9776 "PP: entering DTD\n");
9777#endif
9778 break;
9779 case XML_PARSER_ENTITY_VALUE:
9780 xmlGenericError(xmlGenericErrorContext,
9781 "PP: internal error, state == ENTITY_VALUE\n");
9782 ctxt->instate = XML_PARSER_CONTENT;
9783#ifdef DEBUG_PUSH
9784 xmlGenericError(xmlGenericErrorContext,
9785 "PP: entering DTD\n");
9786#endif
9787 break;
9788 case XML_PARSER_ATTRIBUTE_VALUE:
9789 xmlGenericError(xmlGenericErrorContext,
9790 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9791 ctxt->instate = XML_PARSER_START_TAG;
9792#ifdef DEBUG_PUSH
9793 xmlGenericError(xmlGenericErrorContext,
9794 "PP: entering START_TAG\n");
9795#endif
9796 break;
9797 case XML_PARSER_SYSTEM_LITERAL:
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: internal error, state == SYSTEM_LITERAL\n");
9800 ctxt->instate = XML_PARSER_START_TAG;
9801#ifdef DEBUG_PUSH
9802 xmlGenericError(xmlGenericErrorContext,
9803 "PP: entering START_TAG\n");
9804#endif
9805 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00009806 case XML_PARSER_PUBLIC_LITERAL:
9807 xmlGenericError(xmlGenericErrorContext,
9808 "PP: internal error, state == PUBLIC_LITERAL\n");
9809 ctxt->instate = XML_PARSER_START_TAG;
9810#ifdef DEBUG_PUSH
9811 xmlGenericError(xmlGenericErrorContext,
9812 "PP: entering START_TAG\n");
9813#endif
9814 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009815 }
9816 }
9817done:
9818#ifdef DEBUG_PUSH
9819 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
9820#endif
9821 return(ret);
9822}
9823
9824/**
Owen Taylor3473f882001-02-23 17:55:21 +00009825 * xmlParseChunk:
9826 * @ctxt: an XML parser context
9827 * @chunk: an char array
9828 * @size: the size in byte of the chunk
9829 * @terminate: last chunk indicator
9830 *
9831 * Parse a Chunk of memory
9832 *
9833 * Returns zero if no error, the xmlParserErrors otherwise.
9834 */
9835int
9836xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
9837 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009838 if (ctxt == NULL)
9839 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009840 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9841 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +00009842 if (ctxt->instate == XML_PARSER_START)
9843 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009844 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
9845 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
9846 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
9847 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +00009848 int res;
Owen Taylor3473f882001-02-23 17:55:21 +00009849
William M. Bracka3215c72004-07-31 16:24:01 +00009850 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
9851 if (res < 0) {
9852 ctxt->errNo = XML_PARSER_EOF;
9853 ctxt->disableSAX = 1;
9854 return (XML_PARSER_EOF);
9855 }
Owen Taylor3473f882001-02-23 17:55:21 +00009856 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9857 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009858 ctxt->input->end =
9859 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009860#ifdef DEBUG_PUSH
9861 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
9862#endif
9863
Owen Taylor3473f882001-02-23 17:55:21 +00009864 } else if (ctxt->instate != XML_PARSER_EOF) {
9865 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
9866 xmlParserInputBufferPtr in = ctxt->input->buf;
9867 if ((in->encoder != NULL) && (in->buffer != NULL) &&
9868 (in->raw != NULL)) {
9869 int nbchars;
9870
9871 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
9872 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009873 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +00009874 xmlGenericError(xmlGenericErrorContext,
9875 "xmlParseChunk: encoder error\n");
9876 return(XML_ERR_INVALID_ENCODING);
9877 }
9878 }
9879 }
9880 }
9881 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009882 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
9883 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +00009884 if (terminate) {
9885 /*
9886 * Check for termination
9887 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009888 int avail = 0;
9889
9890 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009891 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009892 avail = ctxt->input->length -
9893 (ctxt->input->cur - ctxt->input->base);
9894 else
9895 avail = ctxt->input->buf->buffer->use -
9896 (ctxt->input->cur - ctxt->input->base);
9897 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009898
Owen Taylor3473f882001-02-23 17:55:21 +00009899 if ((ctxt->instate != XML_PARSER_EOF) &&
9900 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009901 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009902 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009903 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009904 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +00009905 }
Owen Taylor3473f882001-02-23 17:55:21 +00009906 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009907 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009908 ctxt->sax->endDocument(ctxt->userData);
9909 }
9910 ctxt->instate = XML_PARSER_EOF;
9911 }
9912 return((xmlParserErrors) ctxt->errNo);
9913}
9914
9915/************************************************************************
9916 * *
9917 * I/O front end functions to the parser *
9918 * *
9919 ************************************************************************/
9920
9921/**
9922 * xmlStopParser:
9923 * @ctxt: an XML parser context
9924 *
9925 * Blocks further parser processing
9926 */
9927void
9928xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +00009929 if (ctxt == NULL)
9930 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009931 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +00009932 ctxt->disableSAX = 1;
William M. Brack230c5502004-12-20 16:18:49 +00009933 if (ctxt->input != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009934 ctxt->input->cur = BAD_CAST"";
William M. Brack230c5502004-12-20 16:18:49 +00009935 ctxt->input->base = ctxt->input->cur;
9936 }
Owen Taylor3473f882001-02-23 17:55:21 +00009937}
9938
9939/**
9940 * xmlCreatePushParserCtxt:
9941 * @sax: a SAX handler
9942 * @user_data: The user data returned on SAX callbacks
9943 * @chunk: a pointer to an array of chars
9944 * @size: number of chars in the array
9945 * @filename: an optional file name or URI
9946 *
Daniel Veillard176d99f2002-07-06 19:22:28 +00009947 * Create a parser context for using the XML parser in push mode.
9948 * If @buffer and @size are non-NULL, the data is used to detect
9949 * the encoding. The remaining characters will be parsed so they
9950 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +00009951 * To allow content encoding detection, @size should be >= 4
9952 * The value of @filename is used for fetching external entities
9953 * and error/warning reports.
9954 *
9955 * Returns the new parser context or NULL
9956 */
Daniel Veillard176d99f2002-07-06 19:22:28 +00009957
Owen Taylor3473f882001-02-23 17:55:21 +00009958xmlParserCtxtPtr
9959xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
9960 const char *chunk, int size, const char *filename) {
9961 xmlParserCtxtPtr ctxt;
9962 xmlParserInputPtr inputStream;
9963 xmlParserInputBufferPtr buf;
9964 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
9965
9966 /*
9967 * plug some encoding conversion routines
9968 */
9969 if ((chunk != NULL) && (size >= 4))
9970 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
9971
9972 buf = xmlAllocParserInputBuffer(enc);
9973 if (buf == NULL) return(NULL);
9974
9975 ctxt = xmlNewParserCtxt();
9976 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00009977 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009978 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009979 return(NULL);
9980 }
Daniel Veillard03a53c32004-10-26 16:06:51 +00009981 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009982 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
9983 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009984 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009985 xmlFreeParserInputBuffer(buf);
9986 xmlFreeParserCtxt(ctxt);
9987 return(NULL);
9988 }
Owen Taylor3473f882001-02-23 17:55:21 +00009989 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +00009990#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +00009991 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +00009992#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009993 xmlFree(ctxt->sax);
9994 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
9995 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009996 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009997 xmlFreeParserInputBuffer(buf);
9998 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009999 return(NULL);
10000 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010001 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10002 if (sax->initialized == XML_SAX2_MAGIC)
10003 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10004 else
10005 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010006 if (user_data != NULL)
10007 ctxt->userData = user_data;
10008 }
10009 if (filename == NULL) {
10010 ctxt->directory = NULL;
10011 } else {
10012 ctxt->directory = xmlParserGetDirectory(filename);
10013 }
10014
10015 inputStream = xmlNewInputStream(ctxt);
10016 if (inputStream == NULL) {
10017 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010018 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010019 return(NULL);
10020 }
10021
10022 if (filename == NULL)
10023 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010024 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010025 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010026 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010027 if (inputStream->filename == NULL) {
10028 xmlFreeParserCtxt(ctxt);
10029 xmlFreeParserInputBuffer(buf);
10030 return(NULL);
10031 }
10032 }
Owen Taylor3473f882001-02-23 17:55:21 +000010033 inputStream->buf = buf;
10034 inputStream->base = inputStream->buf->buffer->content;
10035 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010036 inputStream->end =
10037 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010038
10039 inputPush(ctxt, inputStream);
10040
10041 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10042 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010043 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10044 int cur = ctxt->input->cur - ctxt->input->base;
10045
Owen Taylor3473f882001-02-23 17:55:21 +000010046 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010047
10048 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10049 ctxt->input->cur = ctxt->input->base + cur;
10050 ctxt->input->end =
10051 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010052#ifdef DEBUG_PUSH
10053 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10054#endif
10055 }
10056
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010057 if (enc != XML_CHAR_ENCODING_NONE) {
10058 xmlSwitchEncoding(ctxt, enc);
10059 }
10060
Owen Taylor3473f882001-02-23 17:55:21 +000010061 return(ctxt);
10062}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010063#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010064
10065/**
10066 * xmlCreateIOParserCtxt:
10067 * @sax: a SAX handler
10068 * @user_data: The user data returned on SAX callbacks
10069 * @ioread: an I/O read function
10070 * @ioclose: an I/O close function
10071 * @ioctx: an I/O handler
10072 * @enc: the charset encoding if known
10073 *
10074 * Create a parser context for using the XML parser with an existing
10075 * I/O stream
10076 *
10077 * Returns the new parser context or NULL
10078 */
10079xmlParserCtxtPtr
10080xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10081 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10082 void *ioctx, xmlCharEncoding enc) {
10083 xmlParserCtxtPtr ctxt;
10084 xmlParserInputPtr inputStream;
10085 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010086
10087 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10090 if (buf == NULL) return(NULL);
10091
10092 ctxt = xmlNewParserCtxt();
10093 if (ctxt == NULL) {
10094 xmlFree(buf);
10095 return(NULL);
10096 }
10097 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010098#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010099 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010100#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010101 xmlFree(ctxt->sax);
10102 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10103 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010105 xmlFree(ctxt);
10106 return(NULL);
10107 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010108 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10109 if (sax->initialized == XML_SAX2_MAGIC)
10110 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10111 else
10112 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010113 if (user_data != NULL)
10114 ctxt->userData = user_data;
10115 }
10116
10117 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10118 if (inputStream == NULL) {
10119 xmlFreeParserCtxt(ctxt);
10120 return(NULL);
10121 }
10122 inputPush(ctxt, inputStream);
10123
10124 return(ctxt);
10125}
10126
Daniel Veillard4432df22003-09-28 18:58:27 +000010127#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010128/************************************************************************
10129 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010130 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010131 * *
10132 ************************************************************************/
10133
10134/**
10135 * xmlIOParseDTD:
10136 * @sax: the SAX handler block or NULL
10137 * @input: an Input Buffer
10138 * @enc: the charset encoding if known
10139 *
10140 * Load and parse a DTD
10141 *
10142 * Returns the resulting xmlDtdPtr or NULL in case of error.
10143 * @input will be freed at parsing end.
10144 */
10145
10146xmlDtdPtr
10147xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10148 xmlCharEncoding enc) {
10149 xmlDtdPtr ret = NULL;
10150 xmlParserCtxtPtr ctxt;
10151 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010152 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010153
10154 if (input == NULL)
10155 return(NULL);
10156
10157 ctxt = xmlNewParserCtxt();
10158 if (ctxt == NULL) {
10159 return(NULL);
10160 }
10161
10162 /*
10163 * Set-up the SAX context
10164 */
10165 if (sax != NULL) {
10166 if (ctxt->sax != NULL)
10167 xmlFree(ctxt->sax);
10168 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010169 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010170 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010171 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010172
10173 /*
10174 * generate a parser input from the I/O handler
10175 */
10176
Daniel Veillard43caefb2003-12-07 19:32:22 +000010177 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010178 if (pinput == NULL) {
10179 if (sax != NULL) ctxt->sax = NULL;
10180 xmlFreeParserCtxt(ctxt);
10181 return(NULL);
10182 }
10183
10184 /*
10185 * plug some encoding conversion routines here.
10186 */
10187 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010188 if (enc != XML_CHAR_ENCODING_NONE) {
10189 xmlSwitchEncoding(ctxt, enc);
10190 }
Owen Taylor3473f882001-02-23 17:55:21 +000010191
10192 pinput->filename = NULL;
10193 pinput->line = 1;
10194 pinput->col = 1;
10195 pinput->base = ctxt->input->cur;
10196 pinput->cur = ctxt->input->cur;
10197 pinput->free = NULL;
10198
10199 /*
10200 * let's parse that entity knowing it's an external subset.
10201 */
10202 ctxt->inSubset = 2;
10203 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10204 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10205 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010206
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010207 if ((enc == XML_CHAR_ENCODING_NONE) &&
10208 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010209 /*
10210 * Get the 4 first bytes and decode the charset
10211 * if enc != XML_CHAR_ENCODING_NONE
10212 * plug some encoding conversion routines.
10213 */
10214 start[0] = RAW;
10215 start[1] = NXT(1);
10216 start[2] = NXT(2);
10217 start[3] = NXT(3);
10218 enc = xmlDetectCharEncoding(start, 4);
10219 if (enc != XML_CHAR_ENCODING_NONE) {
10220 xmlSwitchEncoding(ctxt, enc);
10221 }
10222 }
10223
Owen Taylor3473f882001-02-23 17:55:21 +000010224 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10225
10226 if (ctxt->myDoc != NULL) {
10227 if (ctxt->wellFormed) {
10228 ret = ctxt->myDoc->extSubset;
10229 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010230 if (ret != NULL) {
10231 xmlNodePtr tmp;
10232
10233 ret->doc = NULL;
10234 tmp = ret->children;
10235 while (tmp != NULL) {
10236 tmp->doc = NULL;
10237 tmp = tmp->next;
10238 }
10239 }
Owen Taylor3473f882001-02-23 17:55:21 +000010240 } else {
10241 ret = NULL;
10242 }
10243 xmlFreeDoc(ctxt->myDoc);
10244 ctxt->myDoc = NULL;
10245 }
10246 if (sax != NULL) ctxt->sax = NULL;
10247 xmlFreeParserCtxt(ctxt);
10248
10249 return(ret);
10250}
10251
10252/**
10253 * xmlSAXParseDTD:
10254 * @sax: the SAX handler block
10255 * @ExternalID: a NAME* containing the External ID of the DTD
10256 * @SystemID: a NAME* containing the URL to the DTD
10257 *
10258 * Load and parse an external subset.
10259 *
10260 * Returns the resulting xmlDtdPtr or NULL in case of error.
10261 */
10262
10263xmlDtdPtr
10264xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10265 const xmlChar *SystemID) {
10266 xmlDtdPtr ret = NULL;
10267 xmlParserCtxtPtr ctxt;
10268 xmlParserInputPtr input = NULL;
10269 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010270 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010271
10272 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10273
10274 ctxt = xmlNewParserCtxt();
10275 if (ctxt == NULL) {
10276 return(NULL);
10277 }
10278
10279 /*
10280 * Set-up the SAX context
10281 */
10282 if (sax != NULL) {
10283 if (ctxt->sax != NULL)
10284 xmlFree(ctxt->sax);
10285 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010286 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010287 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010288
10289 /*
10290 * Canonicalise the system ID
10291 */
10292 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010293 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010294 xmlFreeParserCtxt(ctxt);
10295 return(NULL);
10296 }
Owen Taylor3473f882001-02-23 17:55:21 +000010297
10298 /*
10299 * Ask the Entity resolver to load the damn thing
10300 */
10301
10302 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010303 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010304 if (input == NULL) {
10305 if (sax != NULL) ctxt->sax = NULL;
10306 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010307 if (systemIdCanonic != NULL)
10308 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 return(NULL);
10310 }
10311
10312 /*
10313 * plug some encoding conversion routines here.
10314 */
10315 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010316 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10317 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10318 xmlSwitchEncoding(ctxt, enc);
10319 }
Owen Taylor3473f882001-02-23 17:55:21 +000010320
10321 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010322 input->filename = (char *) systemIdCanonic;
10323 else
10324 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010325 input->line = 1;
10326 input->col = 1;
10327 input->base = ctxt->input->cur;
10328 input->cur = ctxt->input->cur;
10329 input->free = NULL;
10330
10331 /*
10332 * let's parse that entity knowing it's an external subset.
10333 */
10334 ctxt->inSubset = 2;
10335 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10336 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10337 ExternalID, SystemID);
10338 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10339
10340 if (ctxt->myDoc != NULL) {
10341 if (ctxt->wellFormed) {
10342 ret = ctxt->myDoc->extSubset;
10343 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010344 if (ret != NULL) {
10345 xmlNodePtr tmp;
10346
10347 ret->doc = NULL;
10348 tmp = ret->children;
10349 while (tmp != NULL) {
10350 tmp->doc = NULL;
10351 tmp = tmp->next;
10352 }
10353 }
Owen Taylor3473f882001-02-23 17:55:21 +000010354 } else {
10355 ret = NULL;
10356 }
10357 xmlFreeDoc(ctxt->myDoc);
10358 ctxt->myDoc = NULL;
10359 }
10360 if (sax != NULL) ctxt->sax = NULL;
10361 xmlFreeParserCtxt(ctxt);
10362
10363 return(ret);
10364}
10365
Daniel Veillard4432df22003-09-28 18:58:27 +000010366
Owen Taylor3473f882001-02-23 17:55:21 +000010367/**
10368 * xmlParseDTD:
10369 * @ExternalID: a NAME* containing the External ID of the DTD
10370 * @SystemID: a NAME* containing the URL to the DTD
10371 *
10372 * Load and parse an external subset.
10373 *
10374 * Returns the resulting xmlDtdPtr or NULL in case of error.
10375 */
10376
10377xmlDtdPtr
10378xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10379 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10380}
Daniel Veillard4432df22003-09-28 18:58:27 +000010381#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010382
10383/************************************************************************
10384 * *
10385 * Front ends when parsing an Entity *
10386 * *
10387 ************************************************************************/
10388
10389/**
Owen Taylor3473f882001-02-23 17:55:21 +000010390 * xmlParseCtxtExternalEntity:
10391 * @ctx: the existing parsing context
10392 * @URL: the URL for the entity to load
10393 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010394 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010395 *
10396 * Parse an external general entity within an existing parsing context
10397 * An external general parsed entity is well-formed if it matches the
10398 * production labeled extParsedEnt.
10399 *
10400 * [78] extParsedEnt ::= TextDecl? content
10401 *
10402 * Returns 0 if the entity is well formed, -1 in case of args problem and
10403 * the parser error code otherwise
10404 */
10405
10406int
10407xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010408 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010409 xmlParserCtxtPtr ctxt;
10410 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010411 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010412 xmlSAXHandlerPtr oldsax = NULL;
10413 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010414 xmlChar start[4];
10415 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010416
Daniel Veillardce682bc2004-11-05 17:22:25 +000010417 if (ctx == NULL) return(-1);
10418
Owen Taylor3473f882001-02-23 17:55:21 +000010419 if (ctx->depth > 40) {
10420 return(XML_ERR_ENTITY_LOOP);
10421 }
10422
Daniel Veillardcda96922001-08-21 10:56:31 +000010423 if (lst != NULL)
10424 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010425 if ((URL == NULL) && (ID == NULL))
10426 return(-1);
10427 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10428 return(-1);
10429
10430
10431 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10432 if (ctxt == NULL) return(-1);
10433 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010434 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010435 oldsax = ctxt->sax;
10436 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010437 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010438 newDoc = xmlNewDoc(BAD_CAST "1.0");
10439 if (newDoc == NULL) {
10440 xmlFreeParserCtxt(ctxt);
10441 return(-1);
10442 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010443 if (ctx->myDoc->dict) {
10444 newDoc->dict = ctx->myDoc->dict;
10445 xmlDictReference(newDoc->dict);
10446 }
Owen Taylor3473f882001-02-23 17:55:21 +000010447 if (ctx->myDoc != NULL) {
10448 newDoc->intSubset = ctx->myDoc->intSubset;
10449 newDoc->extSubset = ctx->myDoc->extSubset;
10450 }
10451 if (ctx->myDoc->URL != NULL) {
10452 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10453 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010454 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10455 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010456 ctxt->sax = oldsax;
10457 xmlFreeParserCtxt(ctxt);
10458 newDoc->intSubset = NULL;
10459 newDoc->extSubset = NULL;
10460 xmlFreeDoc(newDoc);
10461 return(-1);
10462 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010463 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010464 nodePush(ctxt, newDoc->children);
10465 if (ctx->myDoc == NULL) {
10466 ctxt->myDoc = newDoc;
10467 } else {
10468 ctxt->myDoc = ctx->myDoc;
10469 newDoc->children->doc = ctx->myDoc;
10470 }
10471
Daniel Veillard87a764e2001-06-20 17:41:10 +000010472 /*
10473 * Get the 4 first bytes and decode the charset
10474 * if enc != XML_CHAR_ENCODING_NONE
10475 * plug some encoding conversion routines.
10476 */
10477 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010478 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10479 start[0] = RAW;
10480 start[1] = NXT(1);
10481 start[2] = NXT(2);
10482 start[3] = NXT(3);
10483 enc = xmlDetectCharEncoding(start, 4);
10484 if (enc != XML_CHAR_ENCODING_NONE) {
10485 xmlSwitchEncoding(ctxt, enc);
10486 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010487 }
10488
Owen Taylor3473f882001-02-23 17:55:21 +000010489 /*
10490 * Parse a possible text declaration first
10491 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010492 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010493 xmlParseTextDecl(ctxt);
10494 }
10495
10496 /*
10497 * Doing validity checking on chunk doesn't make sense
10498 */
10499 ctxt->instate = XML_PARSER_CONTENT;
10500 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010501 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010502 ctxt->loadsubset = ctx->loadsubset;
10503 ctxt->depth = ctx->depth + 1;
10504 ctxt->replaceEntities = ctx->replaceEntities;
10505 if (ctxt->validate) {
10506 ctxt->vctxt.error = ctx->vctxt.error;
10507 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010508 } else {
10509 ctxt->vctxt.error = NULL;
10510 ctxt->vctxt.warning = NULL;
10511 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010512 ctxt->vctxt.nodeTab = NULL;
10513 ctxt->vctxt.nodeNr = 0;
10514 ctxt->vctxt.nodeMax = 0;
10515 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010516 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10517 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010518 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10519 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10520 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010521 ctxt->dictNames = ctx->dictNames;
10522 ctxt->attsDefault = ctx->attsDefault;
10523 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010524 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010525
10526 xmlParseContent(ctxt);
10527
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010528 ctx->validate = ctxt->validate;
10529 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010530 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010531 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010532 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 }
10535 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010536 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010537 }
10538
10539 if (!ctxt->wellFormed) {
10540 if (ctxt->errNo == 0)
10541 ret = 1;
10542 else
10543 ret = ctxt->errNo;
10544 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010545 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010546 xmlNodePtr cur;
10547
10548 /*
10549 * Return the newly created nodeset after unlinking it from
10550 * they pseudo parent.
10551 */
10552 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010553 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010554 while (cur != NULL) {
10555 cur->parent = NULL;
10556 cur = cur->next;
10557 }
10558 newDoc->children->children = NULL;
10559 }
10560 ret = 0;
10561 }
10562 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010563 ctxt->dict = NULL;
10564 ctxt->attsDefault = NULL;
10565 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010566 xmlFreeParserCtxt(ctxt);
10567 newDoc->intSubset = NULL;
10568 newDoc->extSubset = NULL;
10569 xmlFreeDoc(newDoc);
10570
10571 return(ret);
10572}
10573
10574/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010575 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010576 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010577 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010578 * @sax: the SAX handler bloc (possibly NULL)
10579 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10580 * @depth: Used for loop detection, use 0
10581 * @URL: the URL for the entity to load
10582 * @ID: the System ID for the entity to load
10583 * @list: the return value for the set of parsed nodes
10584 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010585 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010586 *
10587 * Returns 0 if the entity is well formed, -1 in case of args problem and
10588 * the parser error code otherwise
10589 */
10590
Daniel Veillard7d515752003-09-26 19:12:37 +000010591static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010592xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10593 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010594 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010595 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010596 xmlParserCtxtPtr ctxt;
10597 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010598 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010599 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010600 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010601 xmlChar start[4];
10602 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010603
10604 if (depth > 40) {
10605 return(XML_ERR_ENTITY_LOOP);
10606 }
10607
10608
10609
10610 if (list != NULL)
10611 *list = NULL;
10612 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010613 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010614 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010615 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010616
10617
10618 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010619 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010620 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010621 if (oldctxt != NULL) {
10622 ctxt->_private = oldctxt->_private;
10623 ctxt->loadsubset = oldctxt->loadsubset;
10624 ctxt->validate = oldctxt->validate;
10625 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010626 ctxt->record_info = oldctxt->record_info;
10627 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10628 ctxt->node_seq.length = oldctxt->node_seq.length;
10629 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010630 } else {
10631 /*
10632 * Doing validity checking on chunk without context
10633 * doesn't make sense
10634 */
10635 ctxt->_private = NULL;
10636 ctxt->validate = 0;
10637 ctxt->external = 2;
10638 ctxt->loadsubset = 0;
10639 }
Owen Taylor3473f882001-02-23 17:55:21 +000010640 if (sax != NULL) {
10641 oldsax = ctxt->sax;
10642 ctxt->sax = sax;
10643 if (user_data != NULL)
10644 ctxt->userData = user_data;
10645 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010646 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010647 newDoc = xmlNewDoc(BAD_CAST "1.0");
10648 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010649 ctxt->node_seq.maximum = 0;
10650 ctxt->node_seq.length = 0;
10651 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010652 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010653 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010654 }
10655 if (doc != NULL) {
10656 newDoc->intSubset = doc->intSubset;
10657 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010658 newDoc->dict = doc->dict;
10659 } else if (oldctxt != NULL) {
10660 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010661 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010662 xmlDictReference(newDoc->dict);
10663
Owen Taylor3473f882001-02-23 17:55:21 +000010664 if (doc->URL != NULL) {
10665 newDoc->URL = xmlStrdup(doc->URL);
10666 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010667 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10668 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010669 if (sax != NULL)
10670 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010671 ctxt->node_seq.maximum = 0;
10672 ctxt->node_seq.length = 0;
10673 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010674 xmlFreeParserCtxt(ctxt);
10675 newDoc->intSubset = NULL;
10676 newDoc->extSubset = NULL;
10677 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010678 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010679 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010680 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010681 nodePush(ctxt, newDoc->children);
10682 if (doc == NULL) {
10683 ctxt->myDoc = newDoc;
10684 } else {
10685 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010686 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010687 }
10688
Daniel Veillard87a764e2001-06-20 17:41:10 +000010689 /*
10690 * Get the 4 first bytes and decode the charset
10691 * if enc != XML_CHAR_ENCODING_NONE
10692 * plug some encoding conversion routines.
10693 */
10694 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010695 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10696 start[0] = RAW;
10697 start[1] = NXT(1);
10698 start[2] = NXT(2);
10699 start[3] = NXT(3);
10700 enc = xmlDetectCharEncoding(start, 4);
10701 if (enc != XML_CHAR_ENCODING_NONE) {
10702 xmlSwitchEncoding(ctxt, enc);
10703 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010704 }
10705
Owen Taylor3473f882001-02-23 17:55:21 +000010706 /*
10707 * Parse a possible text declaration first
10708 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010709 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010710 xmlParseTextDecl(ctxt);
10711 }
10712
Owen Taylor3473f882001-02-23 17:55:21 +000010713 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010714 ctxt->depth = depth;
10715
10716 xmlParseContent(ctxt);
10717
Daniel Veillard561b7f82002-03-20 21:55:57 +000010718 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010719 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010720 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010721 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010722 }
10723 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010724 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 }
10726
10727 if (!ctxt->wellFormed) {
10728 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010729 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010730 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010731 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010732 } else {
10733 if (list != NULL) {
10734 xmlNodePtr cur;
10735
10736 /*
10737 * Return the newly created nodeset after unlinking it from
10738 * they pseudo parent.
10739 */
10740 cur = newDoc->children->children;
10741 *list = cur;
10742 while (cur != NULL) {
10743 cur->parent = NULL;
10744 cur = cur->next;
10745 }
10746 newDoc->children->children = NULL;
10747 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010748 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010749 }
10750 if (sax != NULL)
10751 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010752 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10753 oldctxt->node_seq.length = ctxt->node_seq.length;
10754 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010755 ctxt->node_seq.maximum = 0;
10756 ctxt->node_seq.length = 0;
10757 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010758 xmlFreeParserCtxt(ctxt);
10759 newDoc->intSubset = NULL;
10760 newDoc->extSubset = NULL;
10761 xmlFreeDoc(newDoc);
10762
10763 return(ret);
10764}
10765
Daniel Veillard81273902003-09-30 00:43:48 +000010766#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010767/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010768 * xmlParseExternalEntity:
10769 * @doc: the document the chunk pertains to
10770 * @sax: the SAX handler bloc (possibly NULL)
10771 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10772 * @depth: Used for loop detection, use 0
10773 * @URL: the URL for the entity to load
10774 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010775 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010776 *
10777 * Parse an external general entity
10778 * An external general parsed entity is well-formed if it matches the
10779 * production labeled extParsedEnt.
10780 *
10781 * [78] extParsedEnt ::= TextDecl? content
10782 *
10783 * Returns 0 if the entity is well formed, -1 in case of args problem and
10784 * the parser error code otherwise
10785 */
10786
10787int
10788xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010789 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010790 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010791 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010792}
10793
10794/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000010795 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000010796 * @doc: the document the chunk pertains to
10797 * @sax: the SAX handler bloc (possibly NULL)
10798 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10799 * @depth: Used for loop detection, use 0
10800 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000010801 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010802 *
10803 * Parse a well-balanced chunk of an XML document
10804 * called by the parser
10805 * The allowed sequence for the Well Balanced Chunk is the one defined by
10806 * the content production in the XML grammar:
10807 *
10808 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10809 *
10810 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
10811 * the parser error code otherwise
10812 */
10813
10814int
10815xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000010816 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000010817 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
10818 depth, string, lst, 0 );
10819}
Daniel Veillard81273902003-09-30 00:43:48 +000010820#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000010821
10822/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000010823 * xmlParseBalancedChunkMemoryInternal:
10824 * @oldctxt: the existing parsing context
10825 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
10826 * @user_data: the user data field for the parser context
10827 * @lst: the return value for the set of parsed nodes
10828 *
10829 *
10830 * Parse a well-balanced chunk of an XML document
10831 * called by the parser
10832 * The allowed sequence for the Well Balanced Chunk is the one defined by
10833 * the content production in the XML grammar:
10834 *
10835 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10836 *
Daniel Veillard7d515752003-09-26 19:12:37 +000010837 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
10838 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000010839 *
10840 * In case recover is set to 1, the nodelist will not be empty even if
10841 * the parsed chunk is not well balanced.
10842 */
Daniel Veillard7d515752003-09-26 19:12:37 +000010843static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000010844xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
10845 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
10846 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010847 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010848 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010849 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010850 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010851 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010852 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000010853 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010854
10855 if (oldctxt->depth > 40) {
10856 return(XML_ERR_ENTITY_LOOP);
10857 }
10858
10859
10860 if (lst != NULL)
10861 *lst = NULL;
10862 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000010863 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010864
10865 size = xmlStrlen(string);
10866
10867 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000010868 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010869 if (user_data != NULL)
10870 ctxt->userData = user_data;
10871 else
10872 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010873 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10874 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010875 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10876 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10877 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010878
10879 oldsax = ctxt->sax;
10880 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010881 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000010882 ctxt->replaceEntities = oldctxt->replaceEntities;
10883 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010884
Daniel Veillarde1ca5032002-12-09 14:13:43 +000010885 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010886 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010887 newDoc = xmlNewDoc(BAD_CAST "1.0");
10888 if (newDoc == NULL) {
10889 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010890 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010891 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000010892 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010893 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010894 newDoc->dict = ctxt->dict;
10895 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010896 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010897 } else {
10898 ctxt->myDoc = oldctxt->myDoc;
10899 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010900 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010901 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010902 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
10903 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010904 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010905 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000010906 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010907 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010908 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010909 }
William M. Brack7b9154b2003-09-27 19:23:50 +000010910 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010911 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010912 ctxt->myDoc->children = NULL;
10913 ctxt->myDoc->last = NULL;
10914 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000010915 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010916 ctxt->instate = XML_PARSER_CONTENT;
10917 ctxt->depth = oldctxt->depth + 1;
10918
Daniel Veillard328f48c2002-11-15 15:24:34 +000010919 ctxt->validate = 0;
10920 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000010921 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
10922 /*
10923 * ID/IDREF registration will be done in xmlValidateElement below
10924 */
10925 ctxt->loadsubset |= XML_SKIP_IDS;
10926 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010927 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010928 ctxt->attsDefault = oldctxt->attsDefault;
10929 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010930
Daniel Veillard68e9e742002-11-16 15:35:11 +000010931 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010932 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010933 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010934 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010935 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010936 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010937 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010938 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000010939 }
10940
10941 if (!ctxt->wellFormed) {
10942 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010943 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010944 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010945 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010946 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000010947 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010948 }
10949
William M. Brack7b9154b2003-09-27 19:23:50 +000010950 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000010951 xmlNodePtr cur;
10952
10953 /*
10954 * Return the newly created nodeset after unlinking it from
10955 * they pseudo parent.
10956 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000010957 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010958 *lst = cur;
10959 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000010960#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000010961 if (oldctxt->validate && oldctxt->wellFormed &&
10962 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
10963 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
10964 oldctxt->myDoc, cur);
10965 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010966#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000010967 cur->parent = NULL;
10968 cur = cur->next;
10969 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000010970 ctxt->myDoc->children->children = NULL;
10971 }
10972 if (ctxt->myDoc != NULL) {
10973 xmlFreeNode(ctxt->myDoc->children);
10974 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010975 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010976 }
10977
10978 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000010979 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000010980 ctxt->attsDefault = NULL;
10981 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000010982 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010983 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000010984 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000010985 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000010986
10987 return(ret);
10988}
10989
Daniel Veillard29b17482004-08-16 00:39:03 +000010990/**
10991 * xmlParseInNodeContext:
10992 * @node: the context node
10993 * @data: the input string
10994 * @datalen: the input string length in bytes
10995 * @options: a combination of xmlParserOption
10996 * @lst: the return value for the set of parsed nodes
10997 *
10998 * Parse a well-balanced chunk of an XML document
10999 * within the context (DTD, namespaces, etc ...) of the given node.
11000 *
11001 * The allowed sequence for the data is a Well Balanced Chunk defined by
11002 * the content production in the XML grammar:
11003 *
11004 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11005 *
11006 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11007 * error code otherwise
11008 */
11009xmlParserErrors
11010xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11011 int options, xmlNodePtr *lst) {
11012#ifdef SAX2
11013 xmlParserCtxtPtr ctxt;
11014 xmlDocPtr doc = NULL;
11015 xmlNodePtr fake, cur;
11016 int nsnr = 0;
11017
11018 xmlParserErrors ret = XML_ERR_OK;
11019
11020 /*
11021 * check all input parameters, grab the document
11022 */
11023 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11024 return(XML_ERR_INTERNAL_ERROR);
11025 switch (node->type) {
11026 case XML_ELEMENT_NODE:
11027 case XML_ATTRIBUTE_NODE:
11028 case XML_TEXT_NODE:
11029 case XML_CDATA_SECTION_NODE:
11030 case XML_ENTITY_REF_NODE:
11031 case XML_PI_NODE:
11032 case XML_COMMENT_NODE:
11033 case XML_DOCUMENT_NODE:
11034 case XML_HTML_DOCUMENT_NODE:
11035 break;
11036 default:
11037 return(XML_ERR_INTERNAL_ERROR);
11038
11039 }
11040 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11041 (node->type != XML_DOCUMENT_NODE) &&
11042 (node->type != XML_HTML_DOCUMENT_NODE))
11043 node = node->parent;
11044 if (node == NULL)
11045 return(XML_ERR_INTERNAL_ERROR);
11046 if (node->type == XML_ELEMENT_NODE)
11047 doc = node->doc;
11048 else
11049 doc = (xmlDocPtr) node;
11050 if (doc == NULL)
11051 return(XML_ERR_INTERNAL_ERROR);
11052
11053 /*
11054 * allocate a context and set-up everything not related to the
11055 * node position in the tree
11056 */
11057 if (doc->type == XML_DOCUMENT_NODE)
11058 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11059#ifdef LIBXML_HTML_ENABLED
11060 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11061 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11062#endif
11063 else
11064 return(XML_ERR_INTERNAL_ERROR);
11065
11066 if (ctxt == NULL)
11067 return(XML_ERR_NO_MEMORY);
11068 fake = xmlNewComment(NULL);
11069 if (fake == NULL) {
11070 xmlFreeParserCtxt(ctxt);
11071 return(XML_ERR_NO_MEMORY);
11072 }
11073 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011074
11075 /*
11076 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11077 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11078 * we must wait until the last moment to free the original one.
11079 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011080 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011081 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011082 xmlDictFree(ctxt->dict);
11083 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011084 } else
11085 options |= XML_PARSE_NODICT;
11086
11087 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011088 xmlDetectSAX2(ctxt);
11089 ctxt->myDoc = doc;
11090
11091 if (node->type == XML_ELEMENT_NODE) {
11092 nodePush(ctxt, node);
11093 /*
11094 * initialize the SAX2 namespaces stack
11095 */
11096 cur = node;
11097 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11098 xmlNsPtr ns = cur->nsDef;
11099 const xmlChar *iprefix, *ihref;
11100
11101 while (ns != NULL) {
11102 if (ctxt->dict) {
11103 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11104 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11105 } else {
11106 iprefix = ns->prefix;
11107 ihref = ns->href;
11108 }
11109
11110 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11111 nsPush(ctxt, iprefix, ihref);
11112 nsnr++;
11113 }
11114 ns = ns->next;
11115 }
11116 cur = cur->parent;
11117 }
11118 ctxt->instate = XML_PARSER_CONTENT;
11119 }
11120
11121 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11122 /*
11123 * ID/IDREF registration will be done in xmlValidateElement below
11124 */
11125 ctxt->loadsubset |= XML_SKIP_IDS;
11126 }
11127
11128 xmlParseContent(ctxt);
11129 nsPop(ctxt, nsnr);
11130 if ((RAW == '<') && (NXT(1) == '/')) {
11131 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11132 } else if (RAW != 0) {
11133 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11134 }
11135 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11136 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11137 ctxt->wellFormed = 0;
11138 }
11139
11140 if (!ctxt->wellFormed) {
11141 if (ctxt->errNo == 0)
11142 ret = XML_ERR_INTERNAL_ERROR;
11143 else
11144 ret = (xmlParserErrors)ctxt->errNo;
11145 } else {
11146 ret = XML_ERR_OK;
11147 }
11148
11149 /*
11150 * Return the newly created nodeset after unlinking it from
11151 * the pseudo sibling.
11152 */
11153
11154 cur = fake->next;
11155 fake->next = NULL;
11156 node->last = fake;
11157
11158 if (cur != NULL) {
11159 cur->prev = NULL;
11160 }
11161
11162 *lst = cur;
11163
11164 while (cur != NULL) {
11165 cur->parent = NULL;
11166 cur = cur->next;
11167 }
11168
11169 xmlUnlinkNode(fake);
11170 xmlFreeNode(fake);
11171
11172
11173 if (ret != XML_ERR_OK) {
11174 xmlFreeNodeList(*lst);
11175 *lst = NULL;
11176 }
William M. Brackc3f81342004-10-03 01:22:44 +000011177
William M. Brackb7b54de2004-10-06 16:38:01 +000011178 if (doc->dict != NULL)
11179 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011180 xmlFreeParserCtxt(ctxt);
11181
11182 return(ret);
11183#else /* !SAX2 */
11184 return(XML_ERR_INTERNAL_ERROR);
11185#endif
11186}
11187
Daniel Veillard81273902003-09-30 00:43:48 +000011188#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011189/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011190 * xmlParseBalancedChunkMemoryRecover:
11191 * @doc: the document the chunk pertains to
11192 * @sax: the SAX handler bloc (possibly NULL)
11193 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11194 * @depth: Used for loop detection, use 0
11195 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11196 * @lst: the return value for the set of parsed nodes
11197 * @recover: return nodes even if the data is broken (use 0)
11198 *
11199 *
11200 * Parse a well-balanced chunk of an XML document
11201 * called by the parser
11202 * The allowed sequence for the Well Balanced Chunk is the one defined by
11203 * the content production in the XML grammar:
11204 *
11205 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11206 *
11207 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11208 * the parser error code otherwise
11209 *
11210 * In case recover is set to 1, the nodelist will not be empty even if
11211 * the parsed chunk is not well balanced.
11212 */
11213int
11214xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11215 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11216 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011217 xmlParserCtxtPtr ctxt;
11218 xmlDocPtr newDoc;
11219 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011220 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011221 int size;
11222 int ret = 0;
11223
11224 if (depth > 40) {
11225 return(XML_ERR_ENTITY_LOOP);
11226 }
11227
11228
Daniel Veillardcda96922001-08-21 10:56:31 +000011229 if (lst != NULL)
11230 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011231 if (string == NULL)
11232 return(-1);
11233
11234 size = xmlStrlen(string);
11235
11236 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11237 if (ctxt == NULL) return(-1);
11238 ctxt->userData = ctxt;
11239 if (sax != NULL) {
11240 oldsax = ctxt->sax;
11241 ctxt->sax = sax;
11242 if (user_data != NULL)
11243 ctxt->userData = user_data;
11244 }
11245 newDoc = xmlNewDoc(BAD_CAST "1.0");
11246 if (newDoc == NULL) {
11247 xmlFreeParserCtxt(ctxt);
11248 return(-1);
11249 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011250 if ((doc != NULL) && (doc->dict != NULL)) {
11251 xmlDictFree(ctxt->dict);
11252 ctxt->dict = doc->dict;
11253 xmlDictReference(ctxt->dict);
11254 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11255 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11256 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11257 ctxt->dictNames = 1;
11258 } else {
11259 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11260 }
Owen Taylor3473f882001-02-23 17:55:21 +000011261 if (doc != NULL) {
11262 newDoc->intSubset = doc->intSubset;
11263 newDoc->extSubset = doc->extSubset;
11264 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011265 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11266 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011267 if (sax != NULL)
11268 ctxt->sax = oldsax;
11269 xmlFreeParserCtxt(ctxt);
11270 newDoc->intSubset = NULL;
11271 newDoc->extSubset = NULL;
11272 xmlFreeDoc(newDoc);
11273 return(-1);
11274 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011275 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11276 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011277 if (doc == NULL) {
11278 ctxt->myDoc = newDoc;
11279 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011280 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011281 newDoc->children->doc = doc;
11282 }
11283 ctxt->instate = XML_PARSER_CONTENT;
11284 ctxt->depth = depth;
11285
11286 /*
11287 * Doing validity checking on chunk doesn't make sense
11288 */
11289 ctxt->validate = 0;
11290 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011291 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011292
Daniel Veillardb39bc392002-10-26 19:29:51 +000011293 if ( doc != NULL ){
11294 content = doc->children;
11295 doc->children = NULL;
11296 xmlParseContent(ctxt);
11297 doc->children = content;
11298 }
11299 else {
11300 xmlParseContent(ctxt);
11301 }
Owen Taylor3473f882001-02-23 17:55:21 +000011302 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011303 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011305 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011306 }
11307 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011308 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011309 }
11310
11311 if (!ctxt->wellFormed) {
11312 if (ctxt->errNo == 0)
11313 ret = 1;
11314 else
11315 ret = ctxt->errNo;
11316 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011317 ret = 0;
11318 }
11319
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011320 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11321 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011322
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011323 /*
11324 * Return the newly created nodeset after unlinking it from
11325 * they pseudo parent.
11326 */
11327 cur = newDoc->children->children;
11328 *lst = cur;
11329 while (cur != NULL) {
11330 xmlSetTreeDoc(cur, doc);
11331 cur->parent = NULL;
11332 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011333 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011334 newDoc->children->children = NULL;
11335 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011336
Owen Taylor3473f882001-02-23 17:55:21 +000011337 if (sax != NULL)
11338 ctxt->sax = oldsax;
11339 xmlFreeParserCtxt(ctxt);
11340 newDoc->intSubset = NULL;
11341 newDoc->extSubset = NULL;
11342 xmlFreeDoc(newDoc);
11343
11344 return(ret);
11345}
11346
11347/**
11348 * xmlSAXParseEntity:
11349 * @sax: the SAX handler block
11350 * @filename: the filename
11351 *
11352 * parse an XML external entity out of context and build a tree.
11353 * It use the given SAX function block to handle the parsing callback.
11354 * If sax is NULL, fallback to the default DOM tree building routines.
11355 *
11356 * [78] extParsedEnt ::= TextDecl? content
11357 *
11358 * This correspond to a "Well Balanced" chunk
11359 *
11360 * Returns the resulting document tree
11361 */
11362
11363xmlDocPtr
11364xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11365 xmlDocPtr ret;
11366 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011367
11368 ctxt = xmlCreateFileParserCtxt(filename);
11369 if (ctxt == NULL) {
11370 return(NULL);
11371 }
11372 if (sax != NULL) {
11373 if (ctxt->sax != NULL)
11374 xmlFree(ctxt->sax);
11375 ctxt->sax = sax;
11376 ctxt->userData = NULL;
11377 }
11378
Owen Taylor3473f882001-02-23 17:55:21 +000011379 xmlParseExtParsedEnt(ctxt);
11380
11381 if (ctxt->wellFormed)
11382 ret = ctxt->myDoc;
11383 else {
11384 ret = NULL;
11385 xmlFreeDoc(ctxt->myDoc);
11386 ctxt->myDoc = NULL;
11387 }
11388 if (sax != NULL)
11389 ctxt->sax = NULL;
11390 xmlFreeParserCtxt(ctxt);
11391
11392 return(ret);
11393}
11394
11395/**
11396 * xmlParseEntity:
11397 * @filename: the filename
11398 *
11399 * parse an XML external entity out of context and build a tree.
11400 *
11401 * [78] extParsedEnt ::= TextDecl? content
11402 *
11403 * This correspond to a "Well Balanced" chunk
11404 *
11405 * Returns the resulting document tree
11406 */
11407
11408xmlDocPtr
11409xmlParseEntity(const char *filename) {
11410 return(xmlSAXParseEntity(NULL, filename));
11411}
Daniel Veillard81273902003-09-30 00:43:48 +000011412#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011413
11414/**
11415 * xmlCreateEntityParserCtxt:
11416 * @URL: the entity URL
11417 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011418 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011419 *
11420 * Create a parser context for an external entity
11421 * Automatic support for ZLIB/Compress compressed document is provided
11422 * by default if found at compile-time.
11423 *
11424 * Returns the new parser context or NULL
11425 */
11426xmlParserCtxtPtr
11427xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11428 const xmlChar *base) {
11429 xmlParserCtxtPtr ctxt;
11430 xmlParserInputPtr inputStream;
11431 char *directory = NULL;
11432 xmlChar *uri;
11433
11434 ctxt = xmlNewParserCtxt();
11435 if (ctxt == NULL) {
11436 return(NULL);
11437 }
11438
11439 uri = xmlBuildURI(URL, base);
11440
11441 if (uri == NULL) {
11442 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11443 if (inputStream == NULL) {
11444 xmlFreeParserCtxt(ctxt);
11445 return(NULL);
11446 }
11447
11448 inputPush(ctxt, inputStream);
11449
11450 if ((ctxt->directory == NULL) && (directory == NULL))
11451 directory = xmlParserGetDirectory((char *)URL);
11452 if ((ctxt->directory == NULL) && (directory != NULL))
11453 ctxt->directory = directory;
11454 } else {
11455 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11456 if (inputStream == NULL) {
11457 xmlFree(uri);
11458 xmlFreeParserCtxt(ctxt);
11459 return(NULL);
11460 }
11461
11462 inputPush(ctxt, inputStream);
11463
11464 if ((ctxt->directory == NULL) && (directory == NULL))
11465 directory = xmlParserGetDirectory((char *)uri);
11466 if ((ctxt->directory == NULL) && (directory != NULL))
11467 ctxt->directory = directory;
11468 xmlFree(uri);
11469 }
Owen Taylor3473f882001-02-23 17:55:21 +000011470 return(ctxt);
11471}
11472
11473/************************************************************************
11474 * *
11475 * Front ends when parsing from a file *
11476 * *
11477 ************************************************************************/
11478
11479/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011480 * xmlCreateURLParserCtxt:
11481 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011482 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011483 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011484 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011485 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011486 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011487 *
11488 * Returns the new parser context or NULL
11489 */
11490xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011491xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011492{
11493 xmlParserCtxtPtr ctxt;
11494 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011495 char *directory = NULL;
11496
Owen Taylor3473f882001-02-23 17:55:21 +000011497 ctxt = xmlNewParserCtxt();
11498 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011499 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011500 return(NULL);
11501 }
11502
Daniel Veillarddf292f72005-01-16 19:00:15 +000011503 if (options)
11504 xmlCtxtUseOptions(ctxt, options);
11505 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011506
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011507 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011508 if (inputStream == NULL) {
11509 xmlFreeParserCtxt(ctxt);
11510 return(NULL);
11511 }
11512
Owen Taylor3473f882001-02-23 17:55:21 +000011513 inputPush(ctxt, inputStream);
11514 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011515 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011516 if ((ctxt->directory == NULL) && (directory != NULL))
11517 ctxt->directory = directory;
11518
11519 return(ctxt);
11520}
11521
Daniel Veillard61b93382003-11-03 14:28:31 +000011522/**
11523 * xmlCreateFileParserCtxt:
11524 * @filename: the filename
11525 *
11526 * Create a parser context for a file content.
11527 * Automatic support for ZLIB/Compress compressed document is provided
11528 * by default if found at compile-time.
11529 *
11530 * Returns the new parser context or NULL
11531 */
11532xmlParserCtxtPtr
11533xmlCreateFileParserCtxt(const char *filename)
11534{
11535 return(xmlCreateURLParserCtxt(filename, 0));
11536}
11537
Daniel Veillard81273902003-09-30 00:43:48 +000011538#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011539/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011540 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011541 * @sax: the SAX handler block
11542 * @filename: the filename
11543 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11544 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011545 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011546 *
11547 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11548 * compressed document is provided by default if found at compile-time.
11549 * It use the given SAX function block to handle the parsing callback.
11550 * If sax is NULL, fallback to the default DOM tree building routines.
11551 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011552 * User data (void *) is stored within the parser context in the
11553 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011554 *
Owen Taylor3473f882001-02-23 17:55:21 +000011555 * Returns the resulting document tree
11556 */
11557
11558xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011559xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11560 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011561 xmlDocPtr ret;
11562 xmlParserCtxtPtr ctxt;
11563 char *directory = NULL;
11564
Daniel Veillard635ef722001-10-29 11:48:19 +000011565 xmlInitParser();
11566
Owen Taylor3473f882001-02-23 17:55:21 +000011567 ctxt = xmlCreateFileParserCtxt(filename);
11568 if (ctxt == NULL) {
11569 return(NULL);
11570 }
11571 if (sax != NULL) {
11572 if (ctxt->sax != NULL)
11573 xmlFree(ctxt->sax);
11574 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011575 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011576 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011577 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011578 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011579 }
Owen Taylor3473f882001-02-23 17:55:21 +000011580
11581 if ((ctxt->directory == NULL) && (directory == NULL))
11582 directory = xmlParserGetDirectory(filename);
11583 if ((ctxt->directory == NULL) && (directory != NULL))
11584 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11585
Daniel Veillarddad3f682002-11-17 16:47:27 +000011586 ctxt->recovery = recovery;
11587
Owen Taylor3473f882001-02-23 17:55:21 +000011588 xmlParseDocument(ctxt);
11589
William M. Brackc07329e2003-09-08 01:57:30 +000011590 if ((ctxt->wellFormed) || recovery) {
11591 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011592 if (ret != NULL) {
11593 if (ctxt->input->buf->compressed > 0)
11594 ret->compression = 9;
11595 else
11596 ret->compression = ctxt->input->buf->compressed;
11597 }
William M. Brackc07329e2003-09-08 01:57:30 +000011598 }
Owen Taylor3473f882001-02-23 17:55:21 +000011599 else {
11600 ret = NULL;
11601 xmlFreeDoc(ctxt->myDoc);
11602 ctxt->myDoc = NULL;
11603 }
11604 if (sax != NULL)
11605 ctxt->sax = NULL;
11606 xmlFreeParserCtxt(ctxt);
11607
11608 return(ret);
11609}
11610
11611/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011612 * xmlSAXParseFile:
11613 * @sax: the SAX handler block
11614 * @filename: the filename
11615 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11616 * documents
11617 *
11618 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11619 * compressed document is provided by default if found at compile-time.
11620 * It use the given SAX function block to handle the parsing callback.
11621 * If sax is NULL, fallback to the default DOM tree building routines.
11622 *
11623 * Returns the resulting document tree
11624 */
11625
11626xmlDocPtr
11627xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11628 int recovery) {
11629 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11630}
11631
11632/**
Owen Taylor3473f882001-02-23 17:55:21 +000011633 * xmlRecoverDoc:
11634 * @cur: a pointer to an array of xmlChar
11635 *
11636 * parse an XML in-memory document and build a tree.
11637 * In the case the document is not Well Formed, a tree is built anyway
11638 *
11639 * Returns the resulting document tree
11640 */
11641
11642xmlDocPtr
11643xmlRecoverDoc(xmlChar *cur) {
11644 return(xmlSAXParseDoc(NULL, cur, 1));
11645}
11646
11647/**
11648 * xmlParseFile:
11649 * @filename: the filename
11650 *
11651 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11652 * compressed document is provided by default if found at compile-time.
11653 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011654 * Returns the resulting document tree if the file was wellformed,
11655 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011656 */
11657
11658xmlDocPtr
11659xmlParseFile(const char *filename) {
11660 return(xmlSAXParseFile(NULL, filename, 0));
11661}
11662
11663/**
11664 * xmlRecoverFile:
11665 * @filename: the filename
11666 *
11667 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11668 * compressed document is provided by default if found at compile-time.
11669 * In the case the document is not Well Formed, a tree is built anyway
11670 *
11671 * Returns the resulting document tree
11672 */
11673
11674xmlDocPtr
11675xmlRecoverFile(const char *filename) {
11676 return(xmlSAXParseFile(NULL, filename, 1));
11677}
11678
11679
11680/**
11681 * xmlSetupParserForBuffer:
11682 * @ctxt: an XML parser context
11683 * @buffer: a xmlChar * buffer
11684 * @filename: a file name
11685 *
11686 * Setup the parser context to parse a new buffer; Clears any prior
11687 * contents from the parser context. The buffer parameter must not be
11688 * NULL, but the filename parameter can be
11689 */
11690void
11691xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11692 const char* filename)
11693{
11694 xmlParserInputPtr input;
11695
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011696 if ((ctxt == NULL) || (buffer == NULL))
11697 return;
11698
Owen Taylor3473f882001-02-23 17:55:21 +000011699 input = xmlNewInputStream(ctxt);
11700 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011701 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011702 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011703 return;
11704 }
11705
11706 xmlClearParserCtxt(ctxt);
11707 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011708 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011709 input->base = buffer;
11710 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011711 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011712 inputPush(ctxt, input);
11713}
11714
11715/**
11716 * xmlSAXUserParseFile:
11717 * @sax: a SAX handler
11718 * @user_data: The user data returned on SAX callbacks
11719 * @filename: a file name
11720 *
11721 * parse an XML file and call the given SAX handler routines.
11722 * Automatic support for ZLIB/Compress compressed document is provided
11723 *
11724 * Returns 0 in case of success or a error number otherwise
11725 */
11726int
11727xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11728 const char *filename) {
11729 int ret = 0;
11730 xmlParserCtxtPtr ctxt;
11731
11732 ctxt = xmlCreateFileParserCtxt(filename);
11733 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011734#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011735 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011736#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011737 xmlFree(ctxt->sax);
11738 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011739 xmlDetectSAX2(ctxt);
11740
Owen Taylor3473f882001-02-23 17:55:21 +000011741 if (user_data != NULL)
11742 ctxt->userData = user_data;
11743
11744 xmlParseDocument(ctxt);
11745
11746 if (ctxt->wellFormed)
11747 ret = 0;
11748 else {
11749 if (ctxt->errNo != 0)
11750 ret = ctxt->errNo;
11751 else
11752 ret = -1;
11753 }
11754 if (sax != NULL)
11755 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011756 if (ctxt->myDoc != NULL) {
11757 xmlFreeDoc(ctxt->myDoc);
11758 ctxt->myDoc = NULL;
11759 }
Owen Taylor3473f882001-02-23 17:55:21 +000011760 xmlFreeParserCtxt(ctxt);
11761
11762 return ret;
11763}
Daniel Veillard81273902003-09-30 00:43:48 +000011764#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011765
11766/************************************************************************
11767 * *
11768 * Front ends when parsing from memory *
11769 * *
11770 ************************************************************************/
11771
11772/**
11773 * xmlCreateMemoryParserCtxt:
11774 * @buffer: a pointer to a char array
11775 * @size: the size of the array
11776 *
11777 * Create a parser context for an XML in-memory document.
11778 *
11779 * Returns the new parser context or NULL
11780 */
11781xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011782xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011783 xmlParserCtxtPtr ctxt;
11784 xmlParserInputPtr input;
11785 xmlParserInputBufferPtr buf;
11786
11787 if (buffer == NULL)
11788 return(NULL);
11789 if (size <= 0)
11790 return(NULL);
11791
11792 ctxt = xmlNewParserCtxt();
11793 if (ctxt == NULL)
11794 return(NULL);
11795
Daniel Veillard53350552003-09-18 13:35:51 +000011796 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000011797 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011798 if (buf == NULL) {
11799 xmlFreeParserCtxt(ctxt);
11800 return(NULL);
11801 }
Owen Taylor3473f882001-02-23 17:55:21 +000011802
11803 input = xmlNewInputStream(ctxt);
11804 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000011805 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011806 xmlFreeParserCtxt(ctxt);
11807 return(NULL);
11808 }
11809
11810 input->filename = NULL;
11811 input->buf = buf;
11812 input->base = input->buf->buffer->content;
11813 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011814 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011815
11816 inputPush(ctxt, input);
11817 return(ctxt);
11818}
11819
Daniel Veillard81273902003-09-30 00:43:48 +000011820#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011821/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011822 * xmlSAXParseMemoryWithData:
11823 * @sax: the SAX handler block
11824 * @buffer: an pointer to a char array
11825 * @size: the size of the array
11826 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11827 * documents
11828 * @data: the userdata
11829 *
11830 * parse an XML in-memory block and use the given SAX function block
11831 * to handle the parsing callback. If sax is NULL, fallback to the default
11832 * DOM tree building routines.
11833 *
11834 * User data (void *) is stored within the parser context in the
11835 * context's _private member, so it is available nearly everywhere in libxml
11836 *
11837 * Returns the resulting document tree
11838 */
11839
11840xmlDocPtr
11841xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
11842 int size, int recovery, void *data) {
11843 xmlDocPtr ret;
11844 xmlParserCtxtPtr ctxt;
11845
11846 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11847 if (ctxt == NULL) return(NULL);
11848 if (sax != NULL) {
11849 if (ctxt->sax != NULL)
11850 xmlFree(ctxt->sax);
11851 ctxt->sax = sax;
11852 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011853 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011854 if (data!=NULL) {
11855 ctxt->_private=data;
11856 }
11857
Daniel Veillardadba5f12003-04-04 16:09:01 +000011858 ctxt->recovery = recovery;
11859
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011860 xmlParseDocument(ctxt);
11861
11862 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
11863 else {
11864 ret = NULL;
11865 xmlFreeDoc(ctxt->myDoc);
11866 ctxt->myDoc = NULL;
11867 }
11868 if (sax != NULL)
11869 ctxt->sax = NULL;
11870 xmlFreeParserCtxt(ctxt);
11871
11872 return(ret);
11873}
11874
11875/**
Owen Taylor3473f882001-02-23 17:55:21 +000011876 * xmlSAXParseMemory:
11877 * @sax: the SAX handler block
11878 * @buffer: an pointer to a char array
11879 * @size: the size of the array
11880 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
11881 * documents
11882 *
11883 * parse an XML in-memory block and use the given SAX function block
11884 * to handle the parsing callback. If sax is NULL, fallback to the default
11885 * DOM tree building routines.
11886 *
11887 * Returns the resulting document tree
11888 */
11889xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000011890xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
11891 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000011892 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011893}
11894
11895/**
11896 * xmlParseMemory:
11897 * @buffer: an pointer to a char array
11898 * @size: the size of the array
11899 *
11900 * parse an XML in-memory block and build a tree.
11901 *
11902 * Returns the resulting document tree
11903 */
11904
Daniel Veillard50822cb2001-07-26 20:05:51 +000011905xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011906 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11907}
11908
11909/**
11910 * xmlRecoverMemory:
11911 * @buffer: an pointer to a char array
11912 * @size: the size of the array
11913 *
11914 * parse an XML in-memory block and build a tree.
11915 * In the case the document is not Well Formed, a tree is built anyway
11916 *
11917 * Returns the resulting document tree
11918 */
11919
Daniel Veillard50822cb2001-07-26 20:05:51 +000011920xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011921 return(xmlSAXParseMemory(NULL, buffer, size, 1));
11922}
11923
11924/**
11925 * xmlSAXUserParseMemory:
11926 * @sax: a SAX handler
11927 * @user_data: The user data returned on SAX callbacks
11928 * @buffer: an in-memory XML document input
11929 * @size: the length of the XML document in bytes
11930 *
11931 * A better SAX parsing routine.
11932 * parse an XML in-memory buffer and call the given SAX handler routines.
11933 *
11934 * Returns 0 in case of success or a error number otherwise
11935 */
11936int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011937 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011938 int ret = 0;
11939 xmlParserCtxtPtr ctxt;
11940 xmlSAXHandlerPtr oldsax = NULL;
11941
Daniel Veillard9e923512002-08-14 08:48:52 +000011942 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000011943 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11944 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000011945 oldsax = ctxt->sax;
11946 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011947 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000011948 if (user_data != NULL)
11949 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000011950
11951 xmlParseDocument(ctxt);
11952
11953 if (ctxt->wellFormed)
11954 ret = 0;
11955 else {
11956 if (ctxt->errNo != 0)
11957 ret = ctxt->errNo;
11958 else
11959 ret = -1;
11960 }
Daniel Veillard9e923512002-08-14 08:48:52 +000011961 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000011962 if (ctxt->myDoc != NULL) {
11963 xmlFreeDoc(ctxt->myDoc);
11964 ctxt->myDoc = NULL;
11965 }
Owen Taylor3473f882001-02-23 17:55:21 +000011966 xmlFreeParserCtxt(ctxt);
11967
11968 return ret;
11969}
Daniel Veillard81273902003-09-30 00:43:48 +000011970#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011971
11972/**
11973 * xmlCreateDocParserCtxt:
11974 * @cur: a pointer to an array of xmlChar
11975 *
11976 * Creates a parser context for an XML in-memory document.
11977 *
11978 * Returns the new parser context or NULL
11979 */
11980xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011981xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000011982 int len;
11983
11984 if (cur == NULL)
11985 return(NULL);
11986 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000011987 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000011988}
11989
Daniel Veillard81273902003-09-30 00:43:48 +000011990#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011991/**
11992 * xmlSAXParseDoc:
11993 * @sax: the SAX handler block
11994 * @cur: a pointer to an array of xmlChar
11995 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11996 * documents
11997 *
11998 * parse an XML in-memory document and build a tree.
11999 * It use the given SAX function block to handle the parsing callback.
12000 * If sax is NULL, fallback to the default DOM tree building routines.
12001 *
12002 * Returns the resulting document tree
12003 */
12004
12005xmlDocPtr
12006xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
12007 xmlDocPtr ret;
12008 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012009 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012010
Daniel Veillard38936062004-11-04 17:45:11 +000012011 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012012
12013
12014 ctxt = xmlCreateDocParserCtxt(cur);
12015 if (ctxt == NULL) return(NULL);
12016 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012017 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012018 ctxt->sax = sax;
12019 ctxt->userData = NULL;
12020 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012021 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012022
12023 xmlParseDocument(ctxt);
12024 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12025 else {
12026 ret = NULL;
12027 xmlFreeDoc(ctxt->myDoc);
12028 ctxt->myDoc = NULL;
12029 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012030 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012031 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012032 xmlFreeParserCtxt(ctxt);
12033
12034 return(ret);
12035}
12036
12037/**
12038 * xmlParseDoc:
12039 * @cur: a pointer to an array of xmlChar
12040 *
12041 * parse an XML in-memory document and build a tree.
12042 *
12043 * Returns the resulting document tree
12044 */
12045
12046xmlDocPtr
12047xmlParseDoc(xmlChar *cur) {
12048 return(xmlSAXParseDoc(NULL, cur, 0));
12049}
Daniel Veillard81273902003-09-30 00:43:48 +000012050#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012051
Daniel Veillard81273902003-09-30 00:43:48 +000012052#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012053/************************************************************************
12054 * *
12055 * Specific function to keep track of entities references *
12056 * and used by the XSLT debugger *
12057 * *
12058 ************************************************************************/
12059
12060static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12061
12062/**
12063 * xmlAddEntityReference:
12064 * @ent : A valid entity
12065 * @firstNode : A valid first node for children of entity
12066 * @lastNode : A valid last node of children entity
12067 *
12068 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12069 */
12070static void
12071xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12072 xmlNodePtr lastNode)
12073{
12074 if (xmlEntityRefFunc != NULL) {
12075 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12076 }
12077}
12078
12079
12080/**
12081 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012082 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012083 *
12084 * Set the function to call call back when a xml reference has been made
12085 */
12086void
12087xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12088{
12089 xmlEntityRefFunc = func;
12090}
Daniel Veillard81273902003-09-30 00:43:48 +000012091#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012092
12093/************************************************************************
12094 * *
12095 * Miscellaneous *
12096 * *
12097 ************************************************************************/
12098
12099#ifdef LIBXML_XPATH_ENABLED
12100#include <libxml/xpath.h>
12101#endif
12102
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012103extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012104static int xmlParserInitialized = 0;
12105
12106/**
12107 * xmlInitParser:
12108 *
12109 * Initialization function for the XML parser.
12110 * This is not reentrant. Call once before processing in case of
12111 * use in multithreaded programs.
12112 */
12113
12114void
12115xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012116 if (xmlParserInitialized != 0)
12117 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012118
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012119 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12120 (xmlGenericError == NULL))
12121 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012122 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012123 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012124 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012125 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012126 xmlDefaultSAXHandlerInit();
12127 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012128#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012129 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012130#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012131#ifdef LIBXML_HTML_ENABLED
12132 htmlInitAutoClose();
12133 htmlDefaultSAXHandlerInit();
12134#endif
12135#ifdef LIBXML_XPATH_ENABLED
12136 xmlXPathInit();
12137#endif
12138 xmlParserInitialized = 1;
12139}
12140
12141/**
12142 * xmlCleanupParser:
12143 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012144 * Cleanup function for the XML library. It tries to reclaim all
12145 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012146 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012147 * function should not prevent reusing the library but one should
12148 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012149 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012150 */
12151
12152void
12153xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012154 if (!xmlParserInitialized)
12155 return;
12156
Owen Taylor3473f882001-02-23 17:55:21 +000012157 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012158#ifdef LIBXML_CATALOG_ENABLED
12159 xmlCatalogCleanup();
12160#endif
Daniel Veillard04054be2003-10-15 10:48:54 +000012161 xmlCleanupInputCallbacks();
12162#ifdef LIBXML_OUTPUT_ENABLED
12163 xmlCleanupOutputCallbacks();
12164#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012165#ifdef LIBXML_SCHEMAS_ENABLED
12166 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012167 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012168#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012169 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012170 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012171 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012172 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012173 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012174}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012175
12176/************************************************************************
12177 * *
12178 * New set (2.6.0) of simpler and more flexible APIs *
12179 * *
12180 ************************************************************************/
12181
12182/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012183 * DICT_FREE:
12184 * @str: a string
12185 *
12186 * Free a string if it is not owned by the "dict" dictionnary in the
12187 * current scope
12188 */
12189#define DICT_FREE(str) \
12190 if ((str) && ((!dict) || \
12191 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12192 xmlFree((char *)(str));
12193
12194/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012195 * xmlCtxtReset:
12196 * @ctxt: an XML parser context
12197 *
12198 * Reset a parser context
12199 */
12200void
12201xmlCtxtReset(xmlParserCtxtPtr ctxt)
12202{
12203 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012204 xmlDictPtr dict;
12205
12206 if (ctxt == NULL)
12207 return;
12208
12209 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012210
12211 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12212 xmlFreeInputStream(input);
12213 }
12214 ctxt->inputNr = 0;
12215 ctxt->input = NULL;
12216
12217 ctxt->spaceNr = 0;
12218 ctxt->spaceTab[0] = -1;
12219 ctxt->space = &ctxt->spaceTab[0];
12220
12221
12222 ctxt->nodeNr = 0;
12223 ctxt->node = NULL;
12224
12225 ctxt->nameNr = 0;
12226 ctxt->name = NULL;
12227
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012228 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012229 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012230 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012231 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012232 DICT_FREE(ctxt->directory);
12233 ctxt->directory = NULL;
12234 DICT_FREE(ctxt->extSubURI);
12235 ctxt->extSubURI = NULL;
12236 DICT_FREE(ctxt->extSubSystem);
12237 ctxt->extSubSystem = NULL;
12238 if (ctxt->myDoc != NULL)
12239 xmlFreeDoc(ctxt->myDoc);
12240 ctxt->myDoc = NULL;
12241
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012242 ctxt->standalone = -1;
12243 ctxt->hasExternalSubset = 0;
12244 ctxt->hasPErefs = 0;
12245 ctxt->html = 0;
12246 ctxt->external = 0;
12247 ctxt->instate = XML_PARSER_START;
12248 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012249
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012250 ctxt->wellFormed = 1;
12251 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012252 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012253 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012254#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012255 ctxt->vctxt.userData = ctxt;
12256 ctxt->vctxt.error = xmlParserValidityError;
12257 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012258#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012259 ctxt->record_info = 0;
12260 ctxt->nbChars = 0;
12261 ctxt->checkIndex = 0;
12262 ctxt->inSubset = 0;
12263 ctxt->errNo = XML_ERR_OK;
12264 ctxt->depth = 0;
12265 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12266 ctxt->catalogs = NULL;
12267 xmlInitNodeInfoSeq(&ctxt->node_seq);
12268
12269 if (ctxt->attsDefault != NULL) {
12270 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12271 ctxt->attsDefault = NULL;
12272 }
12273 if (ctxt->attsSpecial != NULL) {
12274 xmlHashFree(ctxt->attsSpecial, NULL);
12275 ctxt->attsSpecial = NULL;
12276 }
12277
Daniel Veillard4432df22003-09-28 18:58:27 +000012278#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012279 if (ctxt->catalogs != NULL)
12280 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012281#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012282 if (ctxt->lastError.code != XML_ERR_OK)
12283 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012284}
12285
12286/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012287 * xmlCtxtResetPush:
12288 * @ctxt: an XML parser context
12289 * @chunk: a pointer to an array of chars
12290 * @size: number of chars in the array
12291 * @filename: an optional file name or URI
12292 * @encoding: the document encoding, or NULL
12293 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012294 * Reset a push parser context
12295 *
12296 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012297 */
12298int
12299xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12300 int size, const char *filename, const char *encoding)
12301{
12302 xmlParserInputPtr inputStream;
12303 xmlParserInputBufferPtr buf;
12304 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12305
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012306 if (ctxt == NULL)
12307 return(1);
12308
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012309 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12310 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12311
12312 buf = xmlAllocParserInputBuffer(enc);
12313 if (buf == NULL)
12314 return(1);
12315
12316 if (ctxt == NULL) {
12317 xmlFreeParserInputBuffer(buf);
12318 return(1);
12319 }
12320
12321 xmlCtxtReset(ctxt);
12322
12323 if (ctxt->pushTab == NULL) {
12324 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12325 sizeof(xmlChar *));
12326 if (ctxt->pushTab == NULL) {
12327 xmlErrMemory(ctxt, NULL);
12328 xmlFreeParserInputBuffer(buf);
12329 return(1);
12330 }
12331 }
12332
12333 if (filename == NULL) {
12334 ctxt->directory = NULL;
12335 } else {
12336 ctxt->directory = xmlParserGetDirectory(filename);
12337 }
12338
12339 inputStream = xmlNewInputStream(ctxt);
12340 if (inputStream == NULL) {
12341 xmlFreeParserInputBuffer(buf);
12342 return(1);
12343 }
12344
12345 if (filename == NULL)
12346 inputStream->filename = NULL;
12347 else
12348 inputStream->filename = (char *)
12349 xmlCanonicPath((const xmlChar *) filename);
12350 inputStream->buf = buf;
12351 inputStream->base = inputStream->buf->buffer->content;
12352 inputStream->cur = inputStream->buf->buffer->content;
12353 inputStream->end =
12354 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12355
12356 inputPush(ctxt, inputStream);
12357
12358 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12359 (ctxt->input->buf != NULL)) {
12360 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12361 int cur = ctxt->input->cur - ctxt->input->base;
12362
12363 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12364
12365 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12366 ctxt->input->cur = ctxt->input->base + cur;
12367 ctxt->input->end =
12368 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12369 use];
12370#ifdef DEBUG_PUSH
12371 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12372#endif
12373 }
12374
12375 if (encoding != NULL) {
12376 xmlCharEncodingHandlerPtr hdlr;
12377
12378 hdlr = xmlFindCharEncodingHandler(encoding);
12379 if (hdlr != NULL) {
12380 xmlSwitchToEncoding(ctxt, hdlr);
12381 } else {
12382 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12383 "Unsupported encoding %s\n", BAD_CAST encoding);
12384 }
12385 } else if (enc != XML_CHAR_ENCODING_NONE) {
12386 xmlSwitchEncoding(ctxt, enc);
12387 }
12388
12389 return(0);
12390}
12391
12392/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012393 * xmlCtxtUseOptions:
12394 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012395 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012396 *
12397 * Applies the options to the parser context
12398 *
12399 * Returns 0 in case of success, the set of unknown or unimplemented options
12400 * in case of error.
12401 */
12402int
12403xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12404{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012405 if (ctxt == NULL)
12406 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012407 if (options & XML_PARSE_RECOVER) {
12408 ctxt->recovery = 1;
12409 options -= XML_PARSE_RECOVER;
12410 } else
12411 ctxt->recovery = 0;
12412 if (options & XML_PARSE_DTDLOAD) {
12413 ctxt->loadsubset = XML_DETECT_IDS;
12414 options -= XML_PARSE_DTDLOAD;
12415 } else
12416 ctxt->loadsubset = 0;
12417 if (options & XML_PARSE_DTDATTR) {
12418 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12419 options -= XML_PARSE_DTDATTR;
12420 }
12421 if (options & XML_PARSE_NOENT) {
12422 ctxt->replaceEntities = 1;
12423 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12424 options -= XML_PARSE_NOENT;
12425 } else
12426 ctxt->replaceEntities = 0;
12427 if (options & XML_PARSE_NOWARNING) {
12428 ctxt->sax->warning = NULL;
12429 options -= XML_PARSE_NOWARNING;
12430 }
12431 if (options & XML_PARSE_NOERROR) {
12432 ctxt->sax->error = NULL;
12433 ctxt->sax->fatalError = NULL;
12434 options -= XML_PARSE_NOERROR;
12435 }
12436 if (options & XML_PARSE_PEDANTIC) {
12437 ctxt->pedantic = 1;
12438 options -= XML_PARSE_PEDANTIC;
12439 } else
12440 ctxt->pedantic = 0;
12441 if (options & XML_PARSE_NOBLANKS) {
12442 ctxt->keepBlanks = 0;
12443 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12444 options -= XML_PARSE_NOBLANKS;
12445 } else
12446 ctxt->keepBlanks = 1;
12447 if (options & XML_PARSE_DTDVALID) {
12448 ctxt->validate = 1;
12449 if (options & XML_PARSE_NOWARNING)
12450 ctxt->vctxt.warning = NULL;
12451 if (options & XML_PARSE_NOERROR)
12452 ctxt->vctxt.error = NULL;
12453 options -= XML_PARSE_DTDVALID;
12454 } else
12455 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012456#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012457 if (options & XML_PARSE_SAX1) {
12458 ctxt->sax->startElement = xmlSAX2StartElement;
12459 ctxt->sax->endElement = xmlSAX2EndElement;
12460 ctxt->sax->startElementNs = NULL;
12461 ctxt->sax->endElementNs = NULL;
12462 ctxt->sax->initialized = 1;
12463 options -= XML_PARSE_SAX1;
12464 }
Daniel Veillard81273902003-09-30 00:43:48 +000012465#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012466 if (options & XML_PARSE_NODICT) {
12467 ctxt->dictNames = 0;
12468 options -= XML_PARSE_NODICT;
12469 } else {
12470 ctxt->dictNames = 1;
12471 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012472 if (options & XML_PARSE_NOCDATA) {
12473 ctxt->sax->cdataBlock = NULL;
12474 options -= XML_PARSE_NOCDATA;
12475 }
12476 if (options & XML_PARSE_NSCLEAN) {
12477 ctxt->options |= XML_PARSE_NSCLEAN;
12478 options -= XML_PARSE_NSCLEAN;
12479 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012480 if (options & XML_PARSE_NONET) {
12481 ctxt->options |= XML_PARSE_NONET;
12482 options -= XML_PARSE_NONET;
12483 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012484 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012485 return (options);
12486}
12487
12488/**
12489 * xmlDoRead:
12490 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012491 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012492 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012493 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012494 * @reuse: keep the context for reuse
12495 *
12496 * Common front-end for the xmlRead functions
12497 *
12498 * Returns the resulting document tree or NULL
12499 */
12500static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012501xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12502 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012503{
12504 xmlDocPtr ret;
12505
12506 xmlCtxtUseOptions(ctxt, options);
12507 if (encoding != NULL) {
12508 xmlCharEncodingHandlerPtr hdlr;
12509
12510 hdlr = xmlFindCharEncodingHandler(encoding);
12511 if (hdlr != NULL)
12512 xmlSwitchToEncoding(ctxt, hdlr);
12513 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012514 if ((URL != NULL) && (ctxt->input != NULL) &&
12515 (ctxt->input->filename == NULL))
12516 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012517 xmlParseDocument(ctxt);
12518 if ((ctxt->wellFormed) || ctxt->recovery)
12519 ret = ctxt->myDoc;
12520 else {
12521 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012522 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012523 xmlFreeDoc(ctxt->myDoc);
12524 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012525 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012526 ctxt->myDoc = NULL;
12527 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012528 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012529 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012530
12531 return (ret);
12532}
12533
12534/**
12535 * xmlReadDoc:
12536 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012537 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012538 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012539 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012540 *
12541 * parse an XML in-memory document and build a tree.
12542 *
12543 * Returns the resulting document tree
12544 */
12545xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012546xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012547{
12548 xmlParserCtxtPtr ctxt;
12549
12550 if (cur == NULL)
12551 return (NULL);
12552
12553 ctxt = xmlCreateDocParserCtxt(cur);
12554 if (ctxt == NULL)
12555 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012556 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012557}
12558
12559/**
12560 * xmlReadFile:
12561 * @filename: a file or URL
12562 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012563 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012564 *
12565 * parse an XML file from the filesystem or the network.
12566 *
12567 * Returns the resulting document tree
12568 */
12569xmlDocPtr
12570xmlReadFile(const char *filename, const char *encoding, int options)
12571{
12572 xmlParserCtxtPtr ctxt;
12573
Daniel Veillard61b93382003-11-03 14:28:31 +000012574 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012575 if (ctxt == NULL)
12576 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012577 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012578}
12579
12580/**
12581 * xmlReadMemory:
12582 * @buffer: a pointer to a char array
12583 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012584 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012585 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012586 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012587 *
12588 * parse an XML in-memory document and build a tree.
12589 *
12590 * Returns the resulting document tree
12591 */
12592xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012593xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012594{
12595 xmlParserCtxtPtr ctxt;
12596
12597 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12598 if (ctxt == NULL)
12599 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012600 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012601}
12602
12603/**
12604 * xmlReadFd:
12605 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012606 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012607 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012608 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012609 *
12610 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012611 * NOTE that the file descriptor will not be closed when the
12612 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012613 *
12614 * Returns the resulting document tree
12615 */
12616xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012617xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012618{
12619 xmlParserCtxtPtr ctxt;
12620 xmlParserInputBufferPtr input;
12621 xmlParserInputPtr stream;
12622
12623 if (fd < 0)
12624 return (NULL);
12625
12626 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12627 if (input == NULL)
12628 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012629 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012630 ctxt = xmlNewParserCtxt();
12631 if (ctxt == NULL) {
12632 xmlFreeParserInputBuffer(input);
12633 return (NULL);
12634 }
12635 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12636 if (stream == NULL) {
12637 xmlFreeParserInputBuffer(input);
12638 xmlFreeParserCtxt(ctxt);
12639 return (NULL);
12640 }
12641 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012642 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012643}
12644
12645/**
12646 * xmlReadIO:
12647 * @ioread: an I/O read function
12648 * @ioclose: an I/O close function
12649 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012650 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012651 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012652 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012653 *
12654 * parse an XML document from I/O functions and source and build a tree.
12655 *
12656 * Returns the resulting document tree
12657 */
12658xmlDocPtr
12659xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012660 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012661{
12662 xmlParserCtxtPtr ctxt;
12663 xmlParserInputBufferPtr input;
12664 xmlParserInputPtr stream;
12665
12666 if (ioread == NULL)
12667 return (NULL);
12668
12669 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12670 XML_CHAR_ENCODING_NONE);
12671 if (input == NULL)
12672 return (NULL);
12673 ctxt = xmlNewParserCtxt();
12674 if (ctxt == NULL) {
12675 xmlFreeParserInputBuffer(input);
12676 return (NULL);
12677 }
12678 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12679 if (stream == NULL) {
12680 xmlFreeParserInputBuffer(input);
12681 xmlFreeParserCtxt(ctxt);
12682 return (NULL);
12683 }
12684 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012685 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012686}
12687
12688/**
12689 * xmlCtxtReadDoc:
12690 * @ctxt: an XML parser context
12691 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012692 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012693 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012694 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012695 *
12696 * parse an XML in-memory document and build a tree.
12697 * This reuses the existing @ctxt parser context
12698 *
12699 * Returns the resulting document tree
12700 */
12701xmlDocPtr
12702xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012703 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012704{
12705 xmlParserInputPtr stream;
12706
12707 if (cur == NULL)
12708 return (NULL);
12709 if (ctxt == NULL)
12710 return (NULL);
12711
12712 xmlCtxtReset(ctxt);
12713
12714 stream = xmlNewStringInputStream(ctxt, cur);
12715 if (stream == NULL) {
12716 return (NULL);
12717 }
12718 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012719 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012720}
12721
12722/**
12723 * xmlCtxtReadFile:
12724 * @ctxt: an XML parser context
12725 * @filename: a file or URL
12726 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012727 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012728 *
12729 * parse an XML file from the filesystem or the network.
12730 * This reuses the existing @ctxt parser context
12731 *
12732 * Returns the resulting document tree
12733 */
12734xmlDocPtr
12735xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12736 const char *encoding, int options)
12737{
12738 xmlParserInputPtr stream;
12739
12740 if (filename == NULL)
12741 return (NULL);
12742 if (ctxt == NULL)
12743 return (NULL);
12744
12745 xmlCtxtReset(ctxt);
12746
Daniel Veillard29614c72004-11-26 10:47:26 +000012747 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012748 if (stream == NULL) {
12749 return (NULL);
12750 }
12751 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012752 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012753}
12754
12755/**
12756 * xmlCtxtReadMemory:
12757 * @ctxt: an XML parser context
12758 * @buffer: a pointer to a char array
12759 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012760 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012761 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012762 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012763 *
12764 * parse an XML in-memory document and build a tree.
12765 * This reuses the existing @ctxt parser context
12766 *
12767 * Returns the resulting document tree
12768 */
12769xmlDocPtr
12770xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012771 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012772{
12773 xmlParserInputBufferPtr input;
12774 xmlParserInputPtr stream;
12775
12776 if (ctxt == NULL)
12777 return (NULL);
12778 if (buffer == NULL)
12779 return (NULL);
12780
12781 xmlCtxtReset(ctxt);
12782
12783 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12784 if (input == NULL) {
12785 return(NULL);
12786 }
12787
12788 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12789 if (stream == NULL) {
12790 xmlFreeParserInputBuffer(input);
12791 return(NULL);
12792 }
12793
12794 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012795 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012796}
12797
12798/**
12799 * xmlCtxtReadFd:
12800 * @ctxt: an XML parser context
12801 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012802 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012803 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012804 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012805 *
12806 * parse an XML from a file descriptor and build a tree.
12807 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012808 * NOTE that the file descriptor will not be closed when the
12809 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012810 *
12811 * Returns the resulting document tree
12812 */
12813xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012814xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
12815 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012816{
12817 xmlParserInputBufferPtr input;
12818 xmlParserInputPtr stream;
12819
12820 if (fd < 0)
12821 return (NULL);
12822 if (ctxt == NULL)
12823 return (NULL);
12824
12825 xmlCtxtReset(ctxt);
12826
12827
12828 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12829 if (input == NULL)
12830 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012831 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012832 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12833 if (stream == NULL) {
12834 xmlFreeParserInputBuffer(input);
12835 return (NULL);
12836 }
12837 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012838 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012839}
12840
12841/**
12842 * xmlCtxtReadIO:
12843 * @ctxt: an XML parser context
12844 * @ioread: an I/O read function
12845 * @ioclose: an I/O close function
12846 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012847 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012848 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012849 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012850 *
12851 * parse an XML document from I/O functions and source and build a tree.
12852 * This reuses the existing @ctxt parser context
12853 *
12854 * Returns the resulting document tree
12855 */
12856xmlDocPtr
12857xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
12858 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000012859 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012860 const char *encoding, int options)
12861{
12862 xmlParserInputBufferPtr input;
12863 xmlParserInputPtr stream;
12864
12865 if (ioread == NULL)
12866 return (NULL);
12867 if (ctxt == NULL)
12868 return (NULL);
12869
12870 xmlCtxtReset(ctxt);
12871
12872 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12873 XML_CHAR_ENCODING_NONE);
12874 if (input == NULL)
12875 return (NULL);
12876 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12877 if (stream == NULL) {
12878 xmlFreeParserInputBuffer(input);
12879 return (NULL);
12880 }
12881 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012882 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012883}