blob: d08988299c69d9be63ffcdeb9670af4a6ecb50a3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
86 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature.
89 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000090unsigned int xmlParserMaxDepth = 1024;
Owen Taylor3473f882001-02-23 17:55:21 +000091
Daniel Veillard0fb18932003-09-07 09:14:37 +000092#define SAX2 1
93
Daniel Veillard21a0f912001-02-25 19:54:14 +000094#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000095#define XML_PARSER_BUFFER_SIZE 100
96
Daniel Veillard5997aca2002-03-18 18:36:20 +000097#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
98
Owen Taylor3473f882001-02-23 17:55:21 +000099/*
Owen Taylor3473f882001-02-23 17:55:21 +0000100 * List of XML prefixed PI allowed by W3C specs
101 */
102
Daniel Veillardb44025c2001-10-11 22:55:55 +0000103static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000104 "xml-stylesheet",
105 NULL
106};
107
Daniel Veillarda07050d2003-10-19 14:46:32 +0000108
Owen Taylor3473f882001-02-23 17:55:21 +0000109/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
Daniel Veillard7d515752003-09-26 19:12:37 +0000113static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000114xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
115 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000116 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000117 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000118
Daniel Veillard81273902003-09-30 00:43:48 +0000119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000120static void
121xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
122 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000124
Daniel Veillard7d515752003-09-26 19:12:37 +0000125static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000126xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
127 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000128
129/************************************************************************
130 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000131 * Some factorized error routines *
132 * *
133 ************************************************************************/
134
135/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000136 * xmlErrAttributeDup:
137 * @ctxt: an XML parser context
138 * @prefix: the attribute prefix
139 * @localname: the attribute localname
140 *
141 * Handle a redefinition of attribute error
142 */
143static void
144xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
145 const xmlChar * localname)
146{
Daniel Veillard157fee02003-10-31 10:36:03 +0000147 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
148 (ctxt->instate == XML_PARSER_EOF))
149 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000150 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000151 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000152 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000153 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
154 (const char *) localname, NULL, NULL, 0, 0,
155 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000156 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000157 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000158 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
159 (const char *) prefix, (const char *) localname,
160 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
161 localname);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000162 ctxt->wellFormed = 0;
163 if (ctxt->recovery == 0)
164 ctxt->disableSAX = 1;
165}
166
167/**
168 * xmlFatalErr:
169 * @ctxt: an XML parser context
170 * @error: the error number
171 * @extra: extra information string
172 *
173 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
174 */
175static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000176xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000177{
178 const char *errmsg;
179
Daniel Veillard157fee02003-10-31 10:36:03 +0000180 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
181 (ctxt->instate == XML_PARSER_EOF))
182 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000183 switch (error) {
184 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000185 errmsg = "CharRef: invalid hexadecimal value\n";
186 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000188 errmsg = "CharRef: invalid decimal value\n";
189 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000190 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000191 errmsg = "CharRef: invalid value\n";
192 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000194 errmsg = "internal error";
195 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000196 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000197 errmsg = "PEReference at end of document\n";
198 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000199 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000200 errmsg = "PEReference in prolog\n";
201 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000202 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000203 errmsg = "PEReference in epilog\n";
204 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000205 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000206 errmsg = "PEReference: no name\n";
207 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000208 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000209 errmsg = "PEReference: expecting ';'\n";
210 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000211 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000212 errmsg = "Detected an entity reference loop\n";
213 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000214 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000215 errmsg = "EntityValue: \" or ' expected\n";
216 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000217 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000218 errmsg = "PEReferences forbidden in internal subset\n";
219 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000220 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000221 errmsg = "EntityValue: \" or ' expected\n";
222 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000223 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000224 errmsg = "AttValue: \" or ' expected\n";
225 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000226 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000227 errmsg = "Unescaped '<' not allowed in attributes values\n";
228 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000229 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000230 errmsg = "SystemLiteral \" or ' expected\n";
231 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000233 errmsg = "Unfinished System or Public ID \" or ' expected\n";
234 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000235 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000236 errmsg = "Sequence ']]>' not allowed in content\n";
237 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000238 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000239 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
240 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000241 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000242 errmsg = "PUBLIC, the Public Identifier is missing\n";
243 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000244 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000245 errmsg = "Comment must not contain '--' (double-hyphen)\n";
246 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000247 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000248 errmsg = "xmlParsePI : no target name\n";
249 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000250 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000251 errmsg = "Invalid PI name\n";
252 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000253 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000254 errmsg = "NOTATION: Name expected here\n";
255 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000257 errmsg = "'>' required to close NOTATION declaration\n";
258 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000259 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 errmsg = "Entity value required\n";
261 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000262 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000263 errmsg = "Fragment not allowed";
264 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000265 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000266 errmsg = "'(' required to start ATTLIST enumeration\n";
267 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000269 errmsg = "NmToken expected in ATTLIST enumeration\n";
270 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000271 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000272 errmsg = "')' required to finish ATTLIST enumeration\n";
273 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000274 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000275 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
276 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000277 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000278 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
279 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000280 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 errmsg = "ContentDecl : Name or '(' expected\n";
282 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000283 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000284 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
285 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000286 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000287 errmsg =
288 "PEReference: forbidden within markup decl in internal subset\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "expected '>'\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "XML conditional section '[' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg = "Content error in the external subset\n";
298 break;
299 case XML_ERR_CONDSEC_INVALID_KEYWORD:
300 errmsg =
301 "conditional section INCLUDE or IGNORE keyword expected\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section not closed\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Text declaration '<?xml' required\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "parsing XML declaration: '?>' expected\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "external parsed entities cannot be standalone\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "EntityRef: expecting ';'\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "DOCTYPE improperly terminated\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "EndTag: '</' not found\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "expected '='\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "String not closed expecting \" or '\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "String not started expecting ' or \"\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "Invalid XML encoding name\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "standalone accepts only 'yes' or 'no'\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Document is empty\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Extra content at the end of the document\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "chunk is not well balanced\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "extra content at the end of well balanced chunk\n";
350 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000351 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "Malformed declaration expecting version\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 case:
356 errmsg = "\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 default:
360 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 }
362 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
365 info);
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 ctxt->wellFormed = 0;
367 if (ctxt->recovery == 0)
368 ctxt->disableSAX = 1;
369}
370
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000371/**
372 * xmlFatalErrMsg:
373 * @ctxt: an XML parser context
374 * @error: the error number
375 * @msg: the error message
376 *
377 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378 */
379static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000382{
Daniel Veillard157fee02003-10-31 10:36:03 +0000383 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
384 (ctxt->instate == XML_PARSER_EOF))
385 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000386 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000387 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000389 ctxt->wellFormed = 0;
390 if (ctxt->recovery == 0)
391 ctxt->disableSAX = 1;
392}
393
394/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000395 * xmlWarningMsg:
396 * @ctxt: an XML parser context
397 * @error: the error number
398 * @msg: the error message
399 * @str1: extra data
400 * @str2: extra data
401 *
402 * Handle a warning.
403 */
404static void
405xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
406 const char *msg, const xmlChar *str1, const xmlChar *str2)
407{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000408 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000409
Daniel Veillard157fee02003-10-31 10:36:03 +0000410 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
411 (ctxt->instate == XML_PARSER_EOF))
412 return;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000413 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000414 schannel = ctxt->sax->serror;
415 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000416 (ctxt->sax) ? ctxt->sax->warning : NULL,
417 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000418 ctxt, NULL, XML_FROM_PARSER, error,
419 XML_ERR_WARNING, NULL, 0,
420 (const char *) str1, (const char *) str2, NULL, 0, 0,
421 msg, (const char *) str1, (const char *) str2);
422}
423
424/**
425 * xmlValidityError:
426 * @ctxt: an XML parser context
427 * @error: the error number
428 * @msg: the error message
429 * @str1: extra data
430 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000431 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000432 */
433static void
434xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
435 const char *msg, const xmlChar *str1)
436{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000437 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000438
439 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
440 (ctxt->instate == XML_PARSER_EOF))
441 return;
Daniel Veillard24eb9782003-10-04 21:08:09 +0000442 ctxt->errNo = error;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000443 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000444 schannel = ctxt->sax->serror;
Daniel Veillardc790bf42003-10-11 10:50:10 +0000445 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000446 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000447 ctxt, NULL, XML_FROM_DTD, error,
448 XML_ERR_ERROR, NULL, 0, (const char *) str1,
449 NULL, NULL, 0, 0,
450 msg, (const char *) str1);
451 ctxt->valid = 0;
452}
453
454/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 * xmlFatalErrMsgInt:
456 * @ctxt: an XML parser context
457 * @error: the error number
458 * @msg: the error message
459 * @val: an integer value
460 *
461 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462 */
463static void
464xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000466{
Daniel Veillard157fee02003-10-31 10:36:03 +0000467 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468 (ctxt->instate == XML_PARSER_EOF))
469 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000470 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000471 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
473 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000474 ctxt->wellFormed = 0;
475 if (ctxt->recovery == 0)
476 ctxt->disableSAX = 1;
477}
478
479/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000480 * xmlFatalErrMsgStrIntStr:
481 * @ctxt: an XML parser context
482 * @error: the error number
483 * @msg: the error message
484 * @str1: an string info
485 * @val: an integer value
486 * @str2: an string info
487 *
488 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
489 */
490static void
491xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
492 const char *msg, const xmlChar *str1, int val,
493 const xmlChar *str2)
494{
Daniel Veillard157fee02003-10-31 10:36:03 +0000495 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
496 (ctxt->instate == XML_PARSER_EOF))
497 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000498 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000499 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000500 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
501 NULL, 0, (const char *) str1, (const char *) str2,
502 NULL, val, 0, msg, str1, val, str2);
503 ctxt->wellFormed = 0;
504 if (ctxt->recovery == 0)
505 ctxt->disableSAX = 1;
506}
507
508/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000509 * xmlFatalErrMsgStr:
510 * @ctxt: an XML parser context
511 * @error: the error number
512 * @msg: the error message
513 * @val: a string value
514 *
515 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
516 */
517static void
518xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000520{
Daniel Veillard157fee02003-10-31 10:36:03 +0000521 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
522 (ctxt->instate == XML_PARSER_EOF))
523 return;
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000524 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000525 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 XML_FROM_PARSER, error, XML_ERR_FATAL,
527 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
528 val);
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532}
533
534/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000535 * xmlErrMsgStr:
536 * @ctxt: an XML parser context
537 * @error: the error number
538 * @msg: the error message
539 * @val: a string value
540 *
541 * Handle a non fatal parser error
542 */
543static void
544xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
545 const char *msg, const xmlChar * val)
546{
Daniel Veillard157fee02003-10-31 10:36:03 +0000547 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
548 (ctxt->instate == XML_PARSER_EOF))
549 return;
Daniel Veillardf403d292003-10-05 13:51:35 +0000550 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000552 XML_FROM_PARSER, error, XML_ERR_ERROR,
553 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
554 val);
555}
556
557/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558 * xmlNsErr:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the message
562 * @info1: extra information string
563 * @info2: extra information string
564 *
565 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
566 */
567static void
568xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
569 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000570 const xmlChar * info1, const xmlChar * info2,
571 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000572{
Daniel Veillard157fee02003-10-31 10:36:03 +0000573 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574 (ctxt->instate == XML_PARSER_EOF))
575 return;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000576 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000578 XML_ERR_ERROR, NULL, 0, (const char *) info1,
579 (const char *) info2, (const char *) info3, 0, 0, msg,
580 info1, info2, info3);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581 ctxt->nsWellFormed = 0;
582}
583
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000584/************************************************************************
585 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000586 * SAX2 defaulted attributes handling *
587 * *
588 ************************************************************************/
589
590/**
591 * xmlDetectSAX2:
592 * @ctxt: an XML parser context
593 *
594 * Do the SAX2 detection and specific intialization
595 */
596static void
597xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
598 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000599#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000600 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
601 ((ctxt->sax->startElementNs != NULL) ||
602 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000603#else
604 ctxt->sax2 = 1;
605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000606
607 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
608 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
609 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000610 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
611 (ctxt->str_xml_ns == NULL)) {
612 xmlErrMemory(ctxt, NULL);
613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000614}
615
Daniel Veillarde57ec792003-09-10 10:50:59 +0000616typedef struct _xmlDefAttrs xmlDefAttrs;
617typedef xmlDefAttrs *xmlDefAttrsPtr;
618struct _xmlDefAttrs {
619 int nbAttrs; /* number of defaulted attributes on that element */
620 int maxAttrs; /* the size of the array */
621 const xmlChar *values[4]; /* array of localname/prefix/values */
622};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000623
624/**
625 * xmlAddDefAttrs:
626 * @ctxt: an XML parser context
627 * @fullname: the element fullname
628 * @fullattr: the attribute fullname
629 * @value: the attribute value
630 *
631 * Add a defaulted attribute for an element
632 */
633static void
634xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
635 const xmlChar *fullname,
636 const xmlChar *fullattr,
637 const xmlChar *value) {
638 xmlDefAttrsPtr defaults;
639 int len;
640 const xmlChar *name;
641 const xmlChar *prefix;
642
643 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000644 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000645 if (ctxt->attsDefault == NULL)
646 goto mem_error;
647 }
648
649 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +0000650 * split the element name into prefix:localname , the string found
651 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +0000652 */
653 name = xmlSplitQName3(fullname, &len);
654 if (name == NULL) {
655 name = xmlDictLookup(ctxt->dict, fullname, -1);
656 prefix = NULL;
657 } else {
658 name = xmlDictLookup(ctxt->dict, name, -1);
659 prefix = xmlDictLookup(ctxt->dict, fullname, len);
660 }
661
662 /*
663 * make sure there is some storage
664 */
665 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
666 if (defaults == NULL) {
667 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillard079f6a72004-09-23 13:15:03 +0000668 (4 * 4) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000669 if (defaults == NULL)
670 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000671 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000672 defaults->maxAttrs = 4;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000673 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
674 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +0000675 xmlDefAttrsPtr temp;
676
677 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillarde57ec792003-09-10 10:50:59 +0000678 (2 * defaults->maxAttrs * 4) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +0000679 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +0000680 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +0000681 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000682 defaults->maxAttrs *= 2;
683 xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix, defaults, NULL);
684 }
685
686 /*
687 * plit the element name into prefix:localname , the string found
688 * are within the DTD and hen not associated to namespace names.
689 */
690 name = xmlSplitQName3(fullattr, &len);
691 if (name == NULL) {
692 name = xmlDictLookup(ctxt->dict, fullattr, -1);
693 prefix = NULL;
694 } else {
695 name = xmlDictLookup(ctxt->dict, name, -1);
696 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
697 }
698
699 defaults->values[4 * defaults->nbAttrs] = name;
700 defaults->values[4 * defaults->nbAttrs + 1] = prefix;
701 /* intern the string and precompute the end */
702 len = xmlStrlen(value);
703 value = xmlDictLookup(ctxt->dict, value, len);
704 defaults->values[4 * defaults->nbAttrs + 2] = value;
705 defaults->values[4 * defaults->nbAttrs + 3] = value + len;
706 defaults->nbAttrs++;
707
708 return;
709
710mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000711 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000712 return;
713}
714
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000715/**
716 * xmlAddSpecialAttr:
717 * @ctxt: an XML parser context
718 * @fullname: the element fullname
719 * @fullattr: the attribute fullname
720 * @type: the attribute type
721 *
722 * Register that this attribute is not CDATA
723 */
724static void
725xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
726 const xmlChar *fullname,
727 const xmlChar *fullattr,
728 int type)
729{
730 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +0000731 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000732 if (ctxt->attsSpecial == NULL)
733 goto mem_error;
734 }
735
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +0000736 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
737 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000738 return;
739
740mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000741 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +0000742 return;
743}
744
Daniel Veillard4432df22003-09-28 18:58:27 +0000745/**
746 * xmlCheckLanguageID:
747 * @lang: pointer to the string value
748 *
749 * Checks that the value conforms to the LanguageID production:
750 *
751 * NOTE: this is somewhat deprecated, those productions were removed from
752 * the XML Second edition.
753 *
754 * [33] LanguageID ::= Langcode ('-' Subcode)*
755 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
756 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
757 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
758 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
759 * [38] Subcode ::= ([a-z] | [A-Z])+
760 *
761 * Returns 1 if correct 0 otherwise
762 **/
763int
764xmlCheckLanguageID(const xmlChar * lang)
765{
766 const xmlChar *cur = lang;
767
768 if (cur == NULL)
769 return (0);
770 if (((cur[0] == 'i') && (cur[1] == '-')) ||
771 ((cur[0] == 'I') && (cur[1] == '-'))) {
772 /*
773 * IANA code
774 */
775 cur += 2;
776 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
777 ((cur[0] >= 'a') && (cur[0] <= 'z')))
778 cur++;
779 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
780 ((cur[0] == 'X') && (cur[1] == '-'))) {
781 /*
782 * User code
783 */
784 cur += 2;
785 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
786 ((cur[0] >= 'a') && (cur[0] <= 'z')))
787 cur++;
788 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
789 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
790 /*
791 * ISO639
792 */
793 cur++;
794 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
795 ((cur[0] >= 'a') && (cur[0] <= 'z')))
796 cur++;
797 else
798 return (0);
799 } else
800 return (0);
801 while (cur[0] != 0) { /* non input consuming */
802 if (cur[0] != '-')
803 return (0);
804 cur++;
805 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
806 ((cur[0] >= 'a') && (cur[0] <= 'z')))
807 cur++;
808 else
809 return (0);
810 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
811 ((cur[0] >= 'a') && (cur[0] <= 'z')))
812 cur++;
813 }
814 return (1);
815}
816
Owen Taylor3473f882001-02-23 17:55:21 +0000817/************************************************************************
818 * *
819 * Parser stacks related functions and macros *
820 * *
821 ************************************************************************/
822
823xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
824 const xmlChar ** str);
825
Daniel Veillard0fb18932003-09-07 09:14:37 +0000826#ifdef SAX2
827/**
828 * nsPush:
829 * @ctxt: an XML parser context
830 * @prefix: the namespace prefix or NULL
831 * @URL: the namespace name
832 *
833 * Pushes a new parser namespace on top of the ns stack
834 *
William M. Brack7b9154b2003-09-27 19:23:50 +0000835 * Returns -1 in case of error, -2 if the namespace should be discarded
836 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +0000837 */
838static int
839nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
840{
Daniel Veillarddca8cc72003-09-26 13:53:14 +0000841 if (ctxt->options & XML_PARSE_NSCLEAN) {
842 int i;
843 for (i = 0;i < ctxt->nsNr;i += 2) {
844 if (ctxt->nsTab[i] == prefix) {
845 /* in scope */
846 if (ctxt->nsTab[i + 1] == URL)
847 return(-2);
848 /* out of scope keep it */
849 break;
850 }
851 }
852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +0000853 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
854 ctxt->nsMax = 10;
855 ctxt->nsNr = 0;
856 ctxt->nsTab = (const xmlChar **)
857 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
858 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000859 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000860 ctxt->nsMax = 0;
861 return (-1);
862 }
863 } else if (ctxt->nsNr >= ctxt->nsMax) {
864 ctxt->nsMax *= 2;
865 ctxt->nsTab = (const xmlChar **)
Daniel Veillard5bb9ccd2004-02-09 12:39:02 +0000866 xmlRealloc((char *) ctxt->nsTab,
Daniel Veillard0fb18932003-09-07 09:14:37 +0000867 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
868 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000869 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000870 ctxt->nsMax /= 2;
871 return (-1);
872 }
873 }
874 ctxt->nsTab[ctxt->nsNr++] = prefix;
875 ctxt->nsTab[ctxt->nsNr++] = URL;
876 return (ctxt->nsNr);
877}
878/**
879 * nsPop:
880 * @ctxt: an XML parser context
881 * @nr: the number to pop
882 *
883 * Pops the top @nr parser prefix/namespace from the ns stack
884 *
885 * Returns the number of namespaces removed
886 */
887static int
888nsPop(xmlParserCtxtPtr ctxt, int nr)
889{
890 int i;
891
892 if (ctxt->nsTab == NULL) return(0);
893 if (ctxt->nsNr < nr) {
894 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
895 nr = ctxt->nsNr;
896 }
897 if (ctxt->nsNr <= 0)
898 return (0);
899
900 for (i = 0;i < nr;i++) {
901 ctxt->nsNr--;
902 ctxt->nsTab[ctxt->nsNr] = NULL;
903 }
904 return(nr);
905}
906#endif
907
908static int
909xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
910 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000911 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000912 int maxatts;
913
914 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +0000915 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +0000916 atts = (const xmlChar **)
917 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000918 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000919 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000920 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
921 if (attallocs == NULL) goto mem_error;
922 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000923 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000924 } else if (nr + 5 > ctxt->maxatts) {
925 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000926 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
927 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +0000928 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000929 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +0000930 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
931 (maxatts / 5) * sizeof(int));
932 if (attallocs == NULL) goto mem_error;
933 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +0000934 ctxt->maxatts = maxatts;
935 }
936 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000939 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +0000940}
941
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000942/**
943 * inputPush:
944 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000945 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000946 *
947 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000948 *
949 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000950 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000951int
Daniel Veillard1c732d22002-11-30 11:22:59 +0000952inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
953{
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000954 if ((ctxt == NULL) || (value == NULL))
955 return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000956 if (ctxt->inputNr >= ctxt->inputMax) {
957 ctxt->inputMax *= 2;
958 ctxt->inputTab =
959 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
960 ctxt->inputMax *
961 sizeof(ctxt->inputTab[0]));
962 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000963 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000964 return (0);
965 }
966 }
967 ctxt->inputTab[ctxt->inputNr] = value;
968 ctxt->input = value;
969 return (ctxt->inputNr++);
970}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000971/**
Daniel Veillard1c732d22002-11-30 11:22:59 +0000972 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000973 * @ctxt: an XML parser context
974 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000975 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000976 *
Daniel Veillard1c732d22002-11-30 11:22:59 +0000977 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000978 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +0000979xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +0000980inputPop(xmlParserCtxtPtr ctxt)
981{
982 xmlParserInputPtr ret;
983
Daniel Veillard36e5cd52004-11-02 14:52:23 +0000984 if (ctxt == NULL)
985 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +0000986 if (ctxt->inputNr <= 0)
987 return (0);
988 ctxt->inputNr--;
989 if (ctxt->inputNr > 0)
990 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
991 else
992 ctxt->input = NULL;
993 ret = ctxt->inputTab[ctxt->inputNr];
994 ctxt->inputTab[ctxt->inputNr] = 0;
995 return (ret);
996}
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000997/**
998 * nodePush:
999 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001000 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001001 *
1002 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001003 *
1004 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001005 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001006int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001007nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1008{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001009 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001010 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001011 xmlNodePtr *tmp;
1012
1013 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1014 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001015 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001017 xmlErrMemory(ctxt, NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001018 return (0);
1019 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001020 ctxt->nodeTab = tmp;
1021 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001022 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001023 if (((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001025 "Excessive depth in document: change xmlParserMaxDepth = %d\n",
1026 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001027 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001028 return(0);
1029 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001030 ctxt->nodeTab[ctxt->nodeNr] = value;
1031 ctxt->node = value;
1032 return (ctxt->nodeNr++);
1033}
1034/**
1035 * nodePop:
1036 * @ctxt: an XML parser context
1037 *
1038 * Pops the top element node from the node stack
1039 *
1040 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001041 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001042xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001043nodePop(xmlParserCtxtPtr ctxt)
1044{
1045 xmlNodePtr ret;
1046
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001047 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001048 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001049 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001050 ctxt->nodeNr--;
1051 if (ctxt->nodeNr > 0)
1052 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1053 else
1054 ctxt->node = NULL;
1055 ret = ctxt->nodeTab[ctxt->nodeNr];
1056 ctxt->nodeTab[ctxt->nodeNr] = 0;
1057 return (ret);
1058}
Daniel Veillarda2351322004-06-27 12:08:10 +00001059
1060#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001061/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * nameNsPush:
1063 * @ctxt: an XML parser context
1064 * @value: the element name
1065 * @prefix: the element prefix
1066 * @URI: the element namespace name
1067 *
1068 * Pushes a new element name/prefix/URL on top of the name stack
1069 *
1070 * Returns -1 in case of error, the index in the stack otherwise
1071 */
1072static int
1073nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1074 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1075{
1076 if (ctxt->nameNr >= ctxt->nameMax) {
1077 const xmlChar * *tmp;
1078 void **tmp2;
1079 ctxt->nameMax *= 2;
1080 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1081 ctxt->nameMax *
1082 sizeof(ctxt->nameTab[0]));
1083 if (tmp == NULL) {
1084 ctxt->nameMax /= 2;
1085 goto mem_error;
1086 }
1087 ctxt->nameTab = tmp;
1088 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1089 ctxt->nameMax * 3 *
1090 sizeof(ctxt->pushTab[0]));
1091 if (tmp2 == NULL) {
1092 ctxt->nameMax /= 2;
1093 goto mem_error;
1094 }
1095 ctxt->pushTab = tmp2;
1096 }
1097 ctxt->nameTab[ctxt->nameNr] = value;
1098 ctxt->name = value;
1099 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1100 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001101 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return (ctxt->nameNr++);
1103mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001104 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 return (-1);
1106}
1107/**
1108 * nameNsPop:
1109 * @ctxt: an XML parser context
1110 *
1111 * Pops the top element/prefix/URI name from the name stack
1112 *
1113 * Returns the name just removed
1114 */
1115static const xmlChar *
1116nameNsPop(xmlParserCtxtPtr ctxt)
1117{
1118 const xmlChar *ret;
1119
1120 if (ctxt->nameNr <= 0)
1121 return (0);
1122 ctxt->nameNr--;
1123 if (ctxt->nameNr > 0)
1124 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1125 else
1126 ctxt->name = NULL;
1127 ret = ctxt->nameTab[ctxt->nameNr];
1128 ctxt->nameTab[ctxt->nameNr] = NULL;
1129 return (ret);
1130}
Daniel Veillarda2351322004-06-27 12:08:10 +00001131#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132
1133/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001134 * namePush:
1135 * @ctxt: an XML parser context
1136 * @value: the element name
1137 *
1138 * Pushes a new element name on top of the name stack
1139 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001141 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001142int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001143namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001144{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001145 if (ctxt == NULL) return (-1);
1146
Daniel Veillard1c732d22002-11-30 11:22:59 +00001147 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001149 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001151 ctxt->nameMax *
1152 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001153 if (tmp == NULL) {
1154 ctxt->nameMax /= 2;
1155 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001158 }
1159 ctxt->nameTab[ctxt->nameNr] = value;
1160 ctxt->name = value;
1161 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001163 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001165}
1166/**
1167 * namePop:
1168 * @ctxt: an XML parser context
1169 *
1170 * Pops the top element name from the name stack
1171 *
1172 * Returns the name just removed
1173 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001174const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001175namePop(xmlParserCtxtPtr ctxt)
1176{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001177 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001178
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001179 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1180 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001181 ctxt->nameNr--;
1182 if (ctxt->nameNr > 0)
1183 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1184 else
1185 ctxt->name = NULL;
1186 ret = ctxt->nameTab[ctxt->nameNr];
1187 ctxt->nameTab[ctxt->nameNr] = 0;
1188 return (ret);
1189}
Owen Taylor3473f882001-02-23 17:55:21 +00001190
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001191static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001192 if (ctxt->spaceNr >= ctxt->spaceMax) {
1193 ctxt->spaceMax *= 2;
1194 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
1195 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1196 if (ctxt->spaceTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001198 return(0);
1199 }
1200 }
1201 ctxt->spaceTab[ctxt->spaceNr] = val;
1202 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1203 return(ctxt->spaceNr++);
1204}
1205
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001206static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001207 int ret;
1208 if (ctxt->spaceNr <= 0) return(0);
1209 ctxt->spaceNr--;
1210 if (ctxt->spaceNr > 0)
1211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1212 else
1213 ctxt->space = NULL;
1214 ret = ctxt->spaceTab[ctxt->spaceNr];
1215 ctxt->spaceTab[ctxt->spaceNr] = -1;
1216 return(ret);
1217}
1218
1219/*
1220 * Macros for accessing the content. Those should be used only by the parser,
1221 * and not exported.
1222 *
1223 * Dirty macros, i.e. one often need to make assumption on the context to
1224 * use them
1225 *
1226 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1227 * To be used with extreme caution since operations consuming
1228 * characters may move the input buffer to a different location !
1229 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1230 * This should be used internally by the parser
1231 * only to compare to ASCII values otherwise it would break when
1232 * running with UTF-8 encoding.
1233 * RAW same as CUR but in the input buffer, bypass any token
1234 * extraction that may have been done
1235 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1236 * to compare on ASCII based substring.
1237 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001238 * strings without newlines within the parser.
1239 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1240 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001241 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1242 *
1243 * NEXT Skip to the next character, this does the proper decoding
1244 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001245 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001246 * CUR_CHAR(l) returns the current unicode character (int), set l
1247 * to the number of xmlChars used for the encoding [0-5].
1248 * CUR_SCHAR same but operate on a string instead of the context
1249 * COPY_BUF copy the current unicode char to the target buffer, increment
1250 * the index
1251 * GROW, SHRINK handling of input buffers
1252 */
1253
Daniel Veillardfdc91562002-07-01 21:52:03 +00001254#define RAW (*ctxt->input->cur)
1255#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001256#define NXT(val) ctxt->input->cur[(val)]
1257#define CUR_PTR ctxt->input->cur
1258
Daniel Veillarda07050d2003-10-19 14:46:32 +00001259#define CMP4( s, c1, c2, c3, c4 ) \
1260 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1261 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1262#define CMP5( s, c1, c2, c3, c4, c5 ) \
1263 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1264#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1265 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1266#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1267 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1268#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1269 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1270#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1271 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1272 ((unsigned char *) s)[ 8 ] == c9 )
1273#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1274 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1275 ((unsigned char *) s)[ 9 ] == c10 )
1276
Owen Taylor3473f882001-02-23 17:55:21 +00001277#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001278 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001279 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001280 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001281 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1282 xmlPopInput(ctxt); \
1283 } while (0)
1284
Daniel Veillard0b787f32004-03-26 17:29:53 +00001285#define SKIPL(val) do { \
1286 int skipl; \
1287 for(skipl=0; skipl<val; skipl++) { \
1288 if (*(ctxt->input->cur) == '\n') { \
1289 ctxt->input->line++; ctxt->input->col = 1; \
1290 } else ctxt->input->col++; \
1291 ctxt->nbChars++; \
1292 ctxt->input->cur++; \
1293 } \
1294 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1295 if ((*ctxt->input->cur == 0) && \
1296 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1297 xmlPopInput(ctxt); \
1298 } while (0)
1299
Daniel Veillarda880b122003-04-21 21:36:41 +00001300#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001301 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1302 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001303 xmlSHRINK (ctxt);
1304
1305static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1306 xmlParserInputShrink(ctxt->input);
1307 if ((*ctxt->input->cur == 0) &&
1308 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1309 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001310 }
Owen Taylor3473f882001-02-23 17:55:21 +00001311
Daniel Veillarda880b122003-04-21 21:36:41 +00001312#define GROW if ((ctxt->progressive == 0) && \
1313 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001314 xmlGROW (ctxt);
1315
1316static void xmlGROW (xmlParserCtxtPtr ctxt) {
1317 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1318 if ((*ctxt->input->cur == 0) &&
1319 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1320 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001321}
Owen Taylor3473f882001-02-23 17:55:21 +00001322
1323#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1324
1325#define NEXT xmlNextChar(ctxt)
1326
Daniel Veillard21a0f912001-02-25 19:54:14 +00001327#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001328 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001329 ctxt->input->cur++; \
1330 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001331 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001332 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1333 }
1334
Owen Taylor3473f882001-02-23 17:55:21 +00001335#define NEXTL(l) do { \
1336 if (*(ctxt->input->cur) == '\n') { \
1337 ctxt->input->line++; ctxt->input->col = 1; \
1338 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001339 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001340 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001341 } while (0)
1342
1343#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1344#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1345
1346#define COPY_BUF(l,b,i,v) \
1347 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001348 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001349
1350/**
1351 * xmlSkipBlankChars:
1352 * @ctxt: the XML parser context
1353 *
1354 * skip all blanks character found at that point in the input streams.
1355 * It pops up finished entities in the process if allowable at that point.
1356 *
1357 * Returns the number of space chars skipped
1358 */
1359
1360int
1361xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001362 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001363
1364 /*
1365 * It's Okay to use CUR/NEXT here since all the blanks are on
1366 * the ASCII range.
1367 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001368 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1369 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001370 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001371 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001372 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001373 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001374 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001375 if (*cur == '\n') {
1376 ctxt->input->line++; ctxt->input->col = 1;
1377 }
1378 cur++;
1379 res++;
1380 if (*cur == 0) {
1381 ctxt->input->cur = cur;
1382 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1383 cur = ctxt->input->cur;
1384 }
1385 }
1386 ctxt->input->cur = cur;
1387 } else {
1388 int cur;
1389 do {
1390 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001391 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001392 NEXT;
1393 cur = CUR;
1394 res++;
1395 }
1396 while ((cur == 0) && (ctxt->inputNr > 1) &&
1397 (ctxt->instate != XML_PARSER_COMMENT)) {
1398 xmlPopInput(ctxt);
1399 cur = CUR;
1400 }
1401 /*
1402 * Need to handle support of entities branching here
1403 */
1404 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1405 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1406 }
Owen Taylor3473f882001-02-23 17:55:21 +00001407 return(res);
1408}
1409
1410/************************************************************************
1411 * *
1412 * Commodity functions to handle entities *
1413 * *
1414 ************************************************************************/
1415
1416/**
1417 * xmlPopInput:
1418 * @ctxt: an XML parser context
1419 *
1420 * xmlPopInput: the current input pointed by ctxt->input came to an end
1421 * pop it and return the next char.
1422 *
1423 * Returns the current xmlChar in the parser context
1424 */
1425xmlChar
1426xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001427 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001428 if (xmlParserDebugEntities)
1429 xmlGenericError(xmlGenericErrorContext,
1430 "Popping input %d\n", ctxt->inputNr);
1431 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001432 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001433 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1434 return(xmlPopInput(ctxt));
1435 return(CUR);
1436}
1437
1438/**
1439 * xmlPushInput:
1440 * @ctxt: an XML parser context
1441 * @input: an XML parser input fragment (entity, XML fragment ...).
1442 *
1443 * xmlPushInput: switch to a new input stream which is stacked on top
1444 * of the previous one(s).
1445 */
1446void
1447xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1448 if (input == NULL) return;
1449
1450 if (xmlParserDebugEntities) {
1451 if ((ctxt->input != NULL) && (ctxt->input->filename))
1452 xmlGenericError(xmlGenericErrorContext,
1453 "%s(%d): ", ctxt->input->filename,
1454 ctxt->input->line);
1455 xmlGenericError(xmlGenericErrorContext,
1456 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1457 }
1458 inputPush(ctxt, input);
1459 GROW;
1460}
1461
1462/**
1463 * xmlParseCharRef:
1464 * @ctxt: an XML parser context
1465 *
1466 * parse Reference declarations
1467 *
1468 * [66] CharRef ::= '&#' [0-9]+ ';' |
1469 * '&#x' [0-9a-fA-F]+ ';'
1470 *
1471 * [ WFC: Legal Character ]
1472 * Characters referred to using character references must match the
1473 * production for Char.
1474 *
1475 * Returns the value parsed (as an int), 0 in case of error
1476 */
1477int
1478xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001479 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001480 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001481 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001482
Owen Taylor3473f882001-02-23 17:55:21 +00001483 /*
1484 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1485 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001486 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001487 (NXT(2) == 'x')) {
1488 SKIP(3);
1489 GROW;
1490 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001491 if (count++ > 20) {
1492 count = 0;
1493 GROW;
1494 }
1495 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001496 val = val * 16 + (CUR - '0');
1497 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1498 val = val * 16 + (CUR - 'a') + 10;
1499 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1500 val = val * 16 + (CUR - 'A') + 10;
1501 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001502 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001503 val = 0;
1504 break;
1505 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001506 if (val > 0x10FFFF)
1507 outofrange = val;
1508
Owen Taylor3473f882001-02-23 17:55:21 +00001509 NEXT;
1510 count++;
1511 }
1512 if (RAW == ';') {
1513 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001514 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001515 ctxt->nbChars ++;
1516 ctxt->input->cur++;
1517 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001518 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001519 SKIP(2);
1520 GROW;
1521 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001522 if (count++ > 20) {
1523 count = 0;
1524 GROW;
1525 }
1526 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001527 val = val * 10 + (CUR - '0');
1528 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001529 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 val = 0;
1531 break;
1532 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001533 if (val > 0x10FFFF)
1534 outofrange = val;
1535
Owen Taylor3473f882001-02-23 17:55:21 +00001536 NEXT;
1537 count++;
1538 }
1539 if (RAW == ';') {
1540 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001541 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001542 ctxt->nbChars ++;
1543 ctxt->input->cur++;
1544 }
1545 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001546 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001547 }
1548
1549 /*
1550 * [ WFC: Legal Character ]
1551 * Characters referred to using character references must match the
1552 * production for Char.
1553 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001554 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001555 return(val);
1556 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001557 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1558 "xmlParseCharRef: invalid xmlChar value %d\n",
1559 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001560 }
1561 return(0);
1562}
1563
1564/**
1565 * xmlParseStringCharRef:
1566 * @ctxt: an XML parser context
1567 * @str: a pointer to an index in the string
1568 *
1569 * parse Reference declarations, variant parsing from a string rather
1570 * than an an input flow.
1571 *
1572 * [66] CharRef ::= '&#' [0-9]+ ';' |
1573 * '&#x' [0-9a-fA-F]+ ';'
1574 *
1575 * [ WFC: Legal Character ]
1576 * Characters referred to using character references must match the
1577 * production for Char.
1578 *
1579 * Returns the value parsed (as an int), 0 in case of error, str will be
1580 * updated to the current value of the index
1581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001582static int
Owen Taylor3473f882001-02-23 17:55:21 +00001583xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1584 const xmlChar *ptr;
1585 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001586 unsigned int val = 0;
1587 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001588
1589 if ((str == NULL) || (*str == NULL)) return(0);
1590 ptr = *str;
1591 cur = *ptr;
1592 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
1593 ptr += 3;
1594 cur = *ptr;
1595 while (cur != ';') { /* Non input consuming loop */
1596 if ((cur >= '0') && (cur <= '9'))
1597 val = val * 16 + (cur - '0');
1598 else if ((cur >= 'a') && (cur <= 'f'))
1599 val = val * 16 + (cur - 'a') + 10;
1600 else if ((cur >= 'A') && (cur <= 'F'))
1601 val = val * 16 + (cur - 'A') + 10;
1602 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001604 val = 0;
1605 break;
1606 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001607 if (val > 0x10FFFF)
1608 outofrange = val;
1609
Owen Taylor3473f882001-02-23 17:55:21 +00001610 ptr++;
1611 cur = *ptr;
1612 }
1613 if (cur == ';')
1614 ptr++;
1615 } else if ((cur == '&') && (ptr[1] == '#')){
1616 ptr += 2;
1617 cur = *ptr;
1618 while (cur != ';') { /* Non input consuming loops */
1619 if ((cur >= '0') && (cur <= '9'))
1620 val = val * 10 + (cur - '0');
1621 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001622 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001623 val = 0;
1624 break;
1625 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001626 if (val > 0x10FFFF)
1627 outofrange = val;
1628
Owen Taylor3473f882001-02-23 17:55:21 +00001629 ptr++;
1630 cur = *ptr;
1631 }
1632 if (cur == ';')
1633 ptr++;
1634 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001635 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001636 return(0);
1637 }
1638 *str = ptr;
1639
1640 /*
1641 * [ WFC: Legal Character ]
1642 * Characters referred to using character references must match the
1643 * production for Char.
1644 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001645 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001646 return(val);
1647 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001648 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
1649 "xmlParseStringCharRef: invalid xmlChar value %d\n",
1650 val);
Owen Taylor3473f882001-02-23 17:55:21 +00001651 }
1652 return(0);
1653}
1654
1655/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00001656 * xmlNewBlanksWrapperInputStream:
1657 * @ctxt: an XML parser context
1658 * @entity: an Entity pointer
1659 *
1660 * Create a new input stream for wrapping
1661 * blanks around a PEReference
1662 *
1663 * Returns the new input stream or NULL
1664 */
1665
1666static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
1667
Daniel Veillardf4862f02002-09-10 11:13:43 +00001668static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00001669xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1670 xmlParserInputPtr input;
1671 xmlChar *buffer;
1672 size_t length;
1673 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
1675 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00001676 return(NULL);
1677 }
1678 if (xmlParserDebugEntities)
1679 xmlGenericError(xmlGenericErrorContext,
1680 "new blanks wrapper for entity: %s\n", entity->name);
1681 input = xmlNewInputStream(ctxt);
1682 if (input == NULL) {
1683 return(NULL);
1684 }
1685 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001686 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001687 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001688 xmlErrMemory(ctxt, NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00001689 return(NULL);
1690 }
1691 buffer [0] = ' ';
1692 buffer [1] = '%';
1693 buffer [length-3] = ';';
1694 buffer [length-2] = ' ';
1695 buffer [length-1] = 0;
1696 memcpy(buffer + 2, entity->name, length - 5);
1697 input->free = deallocblankswrapper;
1698 input->base = buffer;
1699 input->cur = buffer;
1700 input->length = length;
1701 input->end = &buffer[length];
1702 return(input);
1703}
1704
1705/**
Owen Taylor3473f882001-02-23 17:55:21 +00001706 * xmlParserHandlePEReference:
1707 * @ctxt: the parser context
1708 *
1709 * [69] PEReference ::= '%' Name ';'
1710 *
1711 * [ WFC: No Recursion ]
1712 * A parsed entity must not contain a recursive
1713 * reference to itself, either directly or indirectly.
1714 *
1715 * [ WFC: Entity Declared ]
1716 * In a document without any DTD, a document with only an internal DTD
1717 * subset which contains no parameter entity references, or a document
1718 * with "standalone='yes'", ... ... The declaration of a parameter
1719 * entity must precede any reference to it...
1720 *
1721 * [ VC: Entity Declared ]
1722 * In a document with an external subset or external parameter entities
1723 * with "standalone='no'", ... ... The declaration of a parameter entity
1724 * must precede any reference to it...
1725 *
1726 * [ WFC: In DTD ]
1727 * Parameter-entity references may only appear in the DTD.
1728 * NOTE: misleading but this is handled.
1729 *
1730 * A PEReference may have been detected in the current input stream
1731 * the handling is done accordingly to
1732 * http://www.w3.org/TR/REC-xml#entproc
1733 * i.e.
1734 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001735 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00001736 */
1737void
1738xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001739 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00001740 xmlEntityPtr entity = NULL;
1741 xmlParserInputPtr input;
1742
Owen Taylor3473f882001-02-23 17:55:21 +00001743 if (RAW != '%') return;
1744 switch(ctxt->instate) {
1745 case XML_PARSER_CDATA_SECTION:
1746 return;
1747 case XML_PARSER_COMMENT:
1748 return;
1749 case XML_PARSER_START_TAG:
1750 return;
1751 case XML_PARSER_END_TAG:
1752 return;
1753 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001754 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001755 return;
1756 case XML_PARSER_PROLOG:
1757 case XML_PARSER_START:
1758 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001759 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001760 return;
1761 case XML_PARSER_ENTITY_DECL:
1762 case XML_PARSER_CONTENT:
1763 case XML_PARSER_ATTRIBUTE_VALUE:
1764 case XML_PARSER_PI:
1765 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00001766 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00001767 /* we just ignore it there */
1768 return;
1769 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001770 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001771 return;
1772 case XML_PARSER_ENTITY_VALUE:
1773 /*
1774 * NOTE: in the case of entity values, we don't do the
1775 * substitution here since we need the literal
1776 * entity value to be able to save the internal
1777 * subset of the document.
1778 * This will be handled by xmlStringDecodeEntities
1779 */
1780 return;
1781 case XML_PARSER_DTD:
1782 /*
1783 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1784 * In the internal DTD subset, parameter-entity references
1785 * can occur only where markup declarations can occur, not
1786 * within markup declarations.
1787 * In that case this is handled in xmlParseMarkupDecl
1788 */
1789 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1790 return;
William M. Brack76e95df2003-10-18 16:20:14 +00001791 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00001792 return;
Owen Taylor3473f882001-02-23 17:55:21 +00001793 break;
1794 case XML_PARSER_IGNORE:
1795 return;
1796 }
1797
1798 NEXT;
1799 name = xmlParseName(ctxt);
1800 if (xmlParserDebugEntities)
1801 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001802 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001803 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001804 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001805 } else {
1806 if (RAW == ';') {
1807 NEXT;
1808 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1809 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1810 if (entity == NULL) {
1811
1812 /*
1813 * [ WFC: Entity Declared ]
1814 * In a document without any DTD, a document with only an
1815 * internal DTD subset which contains no parameter entity
1816 * references, or a document with "standalone='yes'", ...
1817 * ... The declaration of a parameter entity must precede
1818 * any reference to it...
1819 */
1820 if ((ctxt->standalone == 1) ||
1821 ((ctxt->hasExternalSubset == 0) &&
1822 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001823 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00001824 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00001825 } else {
1826 /*
1827 * [ VC: Entity Declared ]
1828 * In a document with an external subset or external
1829 * parameter entities with "standalone='no'", ...
1830 * ... The declaration of a parameter entity must precede
1831 * any reference to it...
1832 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00001833 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
1834 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
1835 "PEReference: %%%s; not found\n",
1836 name);
1837 } else
1838 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
1839 "PEReference: %%%s; not found\n",
1840 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001841 ctxt->valid = 0;
1842 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00001843 } else if (ctxt->input->free != deallocblankswrapper) {
1844 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
1845 xmlPushInput(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001846 } else {
1847 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
1848 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00001849 xmlChar start[4];
1850 xmlCharEncoding enc;
1851
Owen Taylor3473f882001-02-23 17:55:21 +00001852 /*
1853 * handle the extra spaces added before and after
1854 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001855 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00001856 */
1857 input = xmlNewEntityInputStream(ctxt, entity);
1858 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +00001859
1860 /*
1861 * Get the 4 first bytes and decode the charset
1862 * if enc != XML_CHAR_ENCODING_NONE
1863 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00001864 * Note that, since we may have some non-UTF8
1865 * encoding (like UTF16, bug 135229), the 'length'
1866 * is not known, but we can calculate based upon
1867 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00001868 */
1869 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00001870 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00001871 start[0] = RAW;
1872 start[1] = NXT(1);
1873 start[2] = NXT(2);
1874 start[3] = NXT(3);
1875 enc = xmlDetectCharEncoding(start, 4);
1876 if (enc != XML_CHAR_ENCODING_NONE) {
1877 xmlSwitchEncoding(ctxt, enc);
1878 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00001879 }
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00001882 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
1883 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00001884 xmlParseTextDecl(ctxt);
1885 }
Owen Taylor3473f882001-02-23 17:55:21 +00001886 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00001887 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
1888 "PEReference: %s is not a parameter entity\n",
1889 name);
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 }
1892 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001893 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001894 }
Owen Taylor3473f882001-02-23 17:55:21 +00001895 }
1896}
1897
1898/*
1899 * Macro used to grow the current buffer.
1900 */
1901#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001902 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001903 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001904 tmp = (xmlChar *) \
Owen Taylor3473f882001-02-23 17:55:21 +00001905 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00001906 if (tmp == NULL) goto mem_error; \
1907 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00001908}
1909
1910/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00001911 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00001912 * @ctxt: the parser context
1913 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00001915 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1916 * @end: an end marker xmlChar, 0 if none
1917 * @end2: an end marker xmlChar, 0 if none
1918 * @end3: an end marker xmlChar, 0 if none
1919 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001920 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00001921 *
1922 * [67] Reference ::= EntityRef | CharRef
1923 *
1924 * [69] PEReference ::= '%' Name ';'
1925 *
1926 * Returns A newly allocated string with the substitution done. The caller
1927 * must deallocate it !
1928 */
1929xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001930xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
1931 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 xmlChar *buffer = NULL;
1933 int buffer_size = 0;
1934
1935 xmlChar *current = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001936 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00001937 xmlEntityPtr ent;
1938 int c,l;
1939 int nbchars = 0;
1940
Daniel Veillarda82b1822004-11-08 16:24:57 +00001941 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00001942 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001943 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00001944
1945 if (ctxt->depth > 40) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001946 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001947 return(NULL);
1948 }
1949
1950 /*
1951 * allocate a translation buffer.
1952 */
1953 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00001954 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001955 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00001956
1957 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001958 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00001959 * we are operating on already parsed values.
1960 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001961 if (str < last)
1962 c = CUR_SCHAR(str, l);
1963 else
1964 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 while ((c != 0) && (c != end) && /* non input consuming loop */
1966 (c != end2) && (c != end3)) {
1967
1968 if (c == 0) break;
1969 if ((c == '&') && (str[1] == '#')) {
1970 int val = xmlParseStringCharRef(ctxt, &str);
1971 if (val != 0) {
1972 COPY_BUF(0,buffer,nbchars,val);
1973 }
1974 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
1975 if (xmlParserDebugEntities)
1976 xmlGenericError(xmlGenericErrorContext,
1977 "String decoding Entity Reference: %.30s\n",
1978 str);
1979 ent = xmlParseStringEntityRef(ctxt, &str);
1980 if ((ent != NULL) &&
1981 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
1982 if (ent->content != NULL) {
1983 COPY_BUF(0,buffer,nbchars,ent->content[0]);
1984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00001985 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
1986 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001987 }
1988 } else if ((ent != NULL) && (ent->content != NULL)) {
1989 xmlChar *rep;
1990
1991 ctxt->depth++;
1992 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1993 0, 0, 0);
1994 ctxt->depth--;
1995 if (rep != NULL) {
1996 current = rep;
1997 while (*current != 0) { /* non input consuming loop */
1998 buffer[nbchars++] = *current++;
1999 if (nbchars >
2000 buffer_size - XML_PARSER_BUFFER_SIZE) {
2001 growBuffer(buffer);
2002 }
2003 }
2004 xmlFree(rep);
2005 }
2006 } else if (ent != NULL) {
2007 int i = xmlStrlen(ent->name);
2008 const xmlChar *cur = ent->name;
2009
2010 buffer[nbchars++] = '&';
2011 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2012 growBuffer(buffer);
2013 }
2014 for (;i > 0;i--)
2015 buffer[nbchars++] = *cur++;
2016 buffer[nbchars++] = ';';
2017 }
2018 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2019 if (xmlParserDebugEntities)
2020 xmlGenericError(xmlGenericErrorContext,
2021 "String decoding PE Reference: %.30s\n", str);
2022 ent = xmlParseStringPEReference(ctxt, &str);
2023 if (ent != NULL) {
2024 xmlChar *rep;
2025
2026 ctxt->depth++;
2027 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2028 0, 0, 0);
2029 ctxt->depth--;
2030 if (rep != NULL) {
2031 current = rep;
2032 while (*current != 0) { /* non input consuming loop */
2033 buffer[nbchars++] = *current++;
2034 if (nbchars >
2035 buffer_size - XML_PARSER_BUFFER_SIZE) {
2036 growBuffer(buffer);
2037 }
2038 }
2039 xmlFree(rep);
2040 }
2041 }
2042 } else {
2043 COPY_BUF(l,buffer,nbchars,c);
2044 str += l;
2045 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2046 growBuffer(buffer);
2047 }
2048 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002049 if (str < last)
2050 c = CUR_SCHAR(str, l);
2051 else
2052 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053 }
2054 buffer[nbchars++] = 0;
2055 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002056
2057mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002058 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002059 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002060}
2061
Daniel Veillarde57ec792003-09-10 10:50:59 +00002062/**
2063 * xmlStringDecodeEntities:
2064 * @ctxt: the parser context
2065 * @str: the input string
2066 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2067 * @end: an end marker xmlChar, 0 if none
2068 * @end2: an end marker xmlChar, 0 if none
2069 * @end3: an end marker xmlChar, 0 if none
2070 *
2071 * Takes a entity string content and process to do the adequate substitutions.
2072 *
2073 * [67] Reference ::= EntityRef | CharRef
2074 *
2075 * [69] PEReference ::= '%' Name ';'
2076 *
2077 * Returns A newly allocated string with the substitution done. The caller
2078 * must deallocate it !
2079 */
2080xmlChar *
2081xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2082 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002083 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002084 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2085 end, end2, end3));
2086}
Owen Taylor3473f882001-02-23 17:55:21 +00002087
2088/************************************************************************
2089 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002090 * Commodity functions, cleanup needed ? *
2091 * *
2092 ************************************************************************/
2093
2094/**
2095 * areBlanks:
2096 * @ctxt: an XML parser context
2097 * @str: a xmlChar *
2098 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002099 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002100 *
2101 * Is this a sequence of blank chars that one can ignore ?
2102 *
2103 * Returns 1 if ignorable 0 otherwise.
2104 */
2105
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002106static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2107 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002108 int i, ret;
2109 xmlNodePtr lastChild;
2110
Daniel Veillard05c13a22001-09-09 08:38:09 +00002111 /*
2112 * Don't spend time trying to differentiate them, the same callback is
2113 * used !
2114 */
2115 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002116 return(0);
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118 /*
2119 * Check for xml:space value.
2120 */
2121 if (*(ctxt->space) == 1)
2122 return(0);
2123
2124 /*
2125 * Check that the string is made of blanks
2126 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002127 if (blank_chars == 0) {
2128 for (i = 0;i < len;i++)
2129 if (!(IS_BLANK_CH(str[i]))) return(0);
2130 }
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002133 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002134 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002135 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002136 if (ctxt->myDoc != NULL) {
2137 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2138 if (ret == 0) return(1);
2139 if (ret == 1) return(0);
2140 }
2141
2142 /*
2143 * Otherwise, heuristic :-\
2144 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002145 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002146 if ((ctxt->node->children == NULL) &&
2147 (RAW == '<') && (NXT(1) == '/')) return(0);
2148
2149 lastChild = xmlGetLastChild(ctxt->node);
2150 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002151 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2152 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002153 } else if (xmlNodeIsText(lastChild))
2154 return(0);
2155 else if ((ctxt->node->children != NULL) &&
2156 (xmlNodeIsText(ctxt->node->children)))
2157 return(0);
2158 return(1);
2159}
2160
Owen Taylor3473f882001-02-23 17:55:21 +00002161/************************************************************************
2162 * *
2163 * Extra stuff for namespace support *
2164 * Relates to http://www.w3.org/TR/WD-xml-names *
2165 * *
2166 ************************************************************************/
2167
2168/**
2169 * xmlSplitQName:
2170 * @ctxt: an XML parser context
2171 * @name: an XML parser context
2172 * @prefix: a xmlChar **
2173 *
2174 * parse an UTF8 encoded XML qualified name string
2175 *
2176 * [NS 5] QName ::= (Prefix ':')? LocalPart
2177 *
2178 * [NS 6] Prefix ::= NCName
2179 *
2180 * [NS 7] LocalPart ::= NCName
2181 *
2182 * Returns the local part, and prefix is updated
2183 * to get the Prefix if any.
2184 */
2185
2186xmlChar *
2187xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2188 xmlChar buf[XML_MAX_NAMELEN + 5];
2189 xmlChar *buffer = NULL;
2190 int len = 0;
2191 int max = XML_MAX_NAMELEN;
2192 xmlChar *ret = NULL;
2193 const xmlChar *cur = name;
2194 int c;
2195
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002196 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002197 *prefix = NULL;
2198
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002199 if (cur == NULL) return(NULL);
2200
Owen Taylor3473f882001-02-23 17:55:21 +00002201#ifndef XML_XML_NAMESPACE
2202 /* xml: prefix is not really a namespace */
2203 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2204 (cur[2] == 'l') && (cur[3] == ':'))
2205 return(xmlStrdup(name));
2206#endif
2207
Daniel Veillard597bc482003-07-24 16:08:28 +00002208 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002209 if (cur[0] == ':')
2210 return(xmlStrdup(name));
2211
2212 c = *cur++;
2213 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2214 buf[len++] = c;
2215 c = *cur++;
2216 }
2217 if (len >= max) {
2218 /*
2219 * Okay someone managed to make a huge name, so he's ready to pay
2220 * for the processing speed.
2221 */
2222 max = len * 2;
2223
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002224 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002225 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002227 return(NULL);
2228 }
2229 memcpy(buffer, buf, len);
2230 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2231 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002232 xmlChar *tmp;
2233
Owen Taylor3473f882001-02-23 17:55:21 +00002234 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002235 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002236 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002237 if (tmp == NULL) {
2238 xmlFree(tmp);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 return(NULL);
2241 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002242 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002243 }
2244 buffer[len++] = c;
2245 c = *cur++;
2246 }
2247 buffer[len] = 0;
2248 }
2249
Daniel Veillard597bc482003-07-24 16:08:28 +00002250 /* nasty but well=formed
2251 if ((c == ':') && (*cur == 0)) {
2252 return(xmlStrdup(name));
2253 } */
2254
Owen Taylor3473f882001-02-23 17:55:21 +00002255 if (buffer == NULL)
2256 ret = xmlStrndup(buf, len);
2257 else {
2258 ret = buffer;
2259 buffer = NULL;
2260 max = XML_MAX_NAMELEN;
2261 }
2262
2263
2264 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002265 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002267 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002268 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002269 }
Owen Taylor3473f882001-02-23 17:55:21 +00002270 len = 0;
2271
Daniel Veillardbb284f42002-10-16 18:02:47 +00002272 /*
2273 * Check that the first character is proper to start
2274 * a new name
2275 */
2276 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2277 ((c >= 0x41) && (c <= 0x5A)) ||
2278 (c == '_') || (c == ':'))) {
2279 int l;
2280 int first = CUR_SCHAR(cur, l);
2281
2282 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002283 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002284 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002285 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002286 }
2287 }
2288 cur++;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2291 buf[len++] = c;
2292 c = *cur++;
2293 }
2294 if (len >= max) {
2295 /*
2296 * Okay someone managed to make a huge name, so he's ready to pay
2297 * for the processing speed.
2298 */
2299 max = len * 2;
2300
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002301 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 return(NULL);
2305 }
2306 memcpy(buffer, buf, len);
2307 while (c != 0) { /* tested bigname2.xml */
2308 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002309 xmlChar *tmp;
2310
Owen Taylor3473f882001-02-23 17:55:21 +00002311 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002312 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002313 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002316 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002317 return(NULL);
2318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002319 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002320 }
2321 buffer[len++] = c;
2322 c = *cur++;
2323 }
2324 buffer[len] = 0;
2325 }
2326
2327 if (buffer == NULL)
2328 ret = xmlStrndup(buf, len);
2329 else {
2330 ret = buffer;
2331 }
2332 }
2333
2334 return(ret);
2335}
2336
2337/************************************************************************
2338 * *
2339 * The parser itself *
2340 * Relates to http://www.w3.org/TR/REC-xml *
2341 * *
2342 ************************************************************************/
2343
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002344static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002345static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002346 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002347
Owen Taylor3473f882001-02-23 17:55:21 +00002348/**
2349 * xmlParseName:
2350 * @ctxt: an XML parser context
2351 *
2352 * parse an XML name.
2353 *
2354 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2355 * CombiningChar | Extender
2356 *
2357 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2358 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002359 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002360 *
2361 * Returns the Name parsed or NULL
2362 */
2363
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002365xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002366 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002367 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00002368 int count = 0;
2369
2370 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002371
2372 /*
2373 * Accelerator for simple ASCII names
2374 */
2375 in = ctxt->input->cur;
2376 if (((*in >= 0x61) && (*in <= 0x7A)) ||
2377 ((*in >= 0x41) && (*in <= 0x5A)) ||
2378 (*in == '_') || (*in == ':')) {
2379 in++;
2380 while (((*in >= 0x61) && (*in <= 0x7A)) ||
2381 ((*in >= 0x41) && (*in <= 0x5A)) ||
2382 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00002383 (*in == '_') || (*in == '-') ||
2384 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002385 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00002386 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002387 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00002390 ctxt->nbChars += count;
2391 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002392 if (ret == NULL)
2393 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002394 return(ret);
2395 }
2396 }
Daniel Veillard2f362242001-03-02 17:36:21 +00002397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00002398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00002399
Daniel Veillard46de64e2002-05-29 08:21:33 +00002400/**
2401 * xmlParseNameAndCompare:
2402 * @ctxt: an XML parser context
2403 *
2404 * parse an XML name and compares for match
2405 * (specialized for endtag parsing)
2406 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002407 * Returns NULL for an illegal name, (xmlChar*) 1 for success
2408 * and the name for mismatch
2409 */
2410
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002411static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00002412xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002413 register const xmlChar *cmp = other;
2414 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002415 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002416
2417 GROW;
2418
2419 in = ctxt->input->cur;
2420 while (*in != 0 && *in == *cmp) {
2421 ++in;
2422 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00002423 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002424 }
William M. Brack76e95df2003-10-18 16:20:14 +00002425 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard46de64e2002-05-29 08:21:33 +00002426 /* success */
2427 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002428 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002429 }
2430 /* failure (or end of input buffer), check with full function */
2431 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002432 /* strings coming from the dictionnary direct compare possible */
2433 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002434 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00002435 }
2436 return ret;
2437}
2438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002439static const xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00002440xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
Daniel Veillard21a0f912001-02-25 19:54:14 +00002441 int len = 0, l;
2442 int c;
2443 int count = 0;
2444
2445 /*
2446 * Handler for more complex cases
2447 */
2448 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00002449 c = CUR_CHAR(l);
2450 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2451 (!IS_LETTER(c) && (c != '_') &&
2452 (c != ':'))) {
2453 return(NULL);
2454 }
2455
2456 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00002457 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Owen Taylor3473f882001-02-23 17:55:21 +00002458 (c == '.') || (c == '-') ||
2459 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002460 (IS_COMBINING(c)) ||
2461 (IS_EXTENDER(c)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002462 if (count++ > 100) {
2463 count = 0;
2464 GROW;
2465 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002466 len += l;
Owen Taylor3473f882001-02-23 17:55:21 +00002467 NEXTL(l);
2468 c = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00002469 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002470 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
Owen Taylor3473f882001-02-23 17:55:21 +00002471}
2472
2473/**
2474 * xmlParseStringName:
2475 * @ctxt: an XML parser context
2476 * @str: a pointer to the string pointer (IN/OUT)
2477 *
2478 * parse an XML name.
2479 *
2480 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2481 * CombiningChar | Extender
2482 *
2483 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2484 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002485 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00002486 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002487 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * is updated to the current location in the string.
2489 */
2490
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002491static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00002492xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
2493 xmlChar buf[XML_MAX_NAMELEN + 5];
2494 const xmlChar *cur = *str;
2495 int len = 0, l;
2496 int c;
2497
2498 c = CUR_SCHAR(cur, l);
William M. Brack871611b2003-10-18 04:53:14 +00002499 if (!IS_LETTER(c) && (c != '_') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002500 (c != ':')) {
2501 return(NULL);
2502 }
2503
William M. Brack871611b2003-10-18 04:53:14 +00002504 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002505 (c == '.') || (c == '-') ||
2506 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002507 (IS_COMBINING(c)) ||
2508 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002509 COPY_BUF(l,buf,len,c);
2510 cur += l;
2511 c = CUR_SCHAR(cur, l);
2512 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
2513 /*
2514 * Okay someone managed to make a huge name, so he's ready to pay
2515 * for the processing speed.
2516 */
2517 xmlChar *buffer;
2518 int max = len * 2;
2519
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002520 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002521 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002522 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 return(NULL);
2524 }
2525 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002526 while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard73b013f2003-09-30 12:36:01 +00002527 /* test bigentname.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002528 (c == '.') || (c == '-') ||
2529 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002530 (IS_COMBINING(c)) ||
2531 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002533 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002534 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002535 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002536 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002537 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002538 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002539 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002540 return(NULL);
2541 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002542 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002543 }
2544 COPY_BUF(l,buffer,len,c);
2545 cur += l;
2546 c = CUR_SCHAR(cur, l);
2547 }
2548 buffer[len] = 0;
2549 *str = cur;
2550 return(buffer);
2551 }
2552 }
2553 *str = cur;
2554 return(xmlStrndup(buf, len));
2555}
2556
2557/**
2558 * xmlParseNmtoken:
2559 * @ctxt: an XML parser context
2560 *
2561 * parse an XML Nmtoken.
2562 *
2563 * [7] Nmtoken ::= (NameChar)+
2564 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00002565 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00002566 *
2567 * Returns the Nmtoken parsed or NULL
2568 */
2569
2570xmlChar *
2571xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
2572 xmlChar buf[XML_MAX_NAMELEN + 5];
2573 int len = 0, l;
2574 int c;
2575 int count = 0;
2576
2577 GROW;
2578 c = CUR_CHAR(l);
2579
William M. Brack871611b2003-10-18 04:53:14 +00002580 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002581 (c == '.') || (c == '-') ||
2582 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002583 (IS_COMBINING(c)) ||
2584 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002585 if (count++ > 100) {
2586 count = 0;
2587 GROW;
2588 }
2589 COPY_BUF(l,buf,len,c);
2590 NEXTL(l);
2591 c = CUR_CHAR(l);
2592 if (len >= XML_MAX_NAMELEN) {
2593 /*
2594 * Okay someone managed to make a huge token, so he's ready to pay
2595 * for the processing speed.
2596 */
2597 xmlChar *buffer;
2598 int max = len * 2;
2599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605 memcpy(buffer, buf, len);
William M. Brack871611b2003-10-18 04:53:14 +00002606 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Owen Taylor3473f882001-02-23 17:55:21 +00002607 (c == '.') || (c == '-') ||
2608 (c == '_') || (c == ':') ||
William M. Brack871611b2003-10-18 04:53:14 +00002609 (IS_COMBINING(c)) ||
2610 (IS_EXTENDER(c))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if (count++ > 100) {
2612 count = 0;
2613 GROW;
2614 }
2615 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002616 xmlChar *tmp;
2617
Owen Taylor3473f882001-02-23 17:55:21 +00002618 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002619 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002620 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002621 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002623 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 return(NULL);
2625 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002626 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002627 }
2628 COPY_BUF(l,buffer,len,c);
2629 NEXTL(l);
2630 c = CUR_CHAR(l);
2631 }
2632 buffer[len] = 0;
2633 return(buffer);
2634 }
2635 }
2636 if (len == 0)
2637 return(NULL);
2638 return(xmlStrndup(buf, len));
2639}
2640
2641/**
2642 * xmlParseEntityValue:
2643 * @ctxt: an XML parser context
2644 * @orig: if non-NULL store a copy of the original entity value
2645 *
2646 * parse a value for ENTITY declarations
2647 *
2648 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2649 * "'" ([^%&'] | PEReference | Reference)* "'"
2650 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002651 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002652 */
2653
2654xmlChar *
2655xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2656 xmlChar *buf = NULL;
2657 int len = 0;
2658 int size = XML_PARSER_BUFFER_SIZE;
2659 int c, l;
2660 xmlChar stop;
2661 xmlChar *ret = NULL;
2662 const xmlChar *cur = NULL;
2663 xmlParserInputPtr input;
2664
2665 if (RAW == '"') stop = '"';
2666 else if (RAW == '\'') stop = '\'';
2667 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002668 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002669 return(NULL);
2670 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002671 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002672 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002673 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 return(NULL);
2675 }
2676
2677 /*
2678 * The content of the entity definition is copied in a buffer.
2679 */
2680
2681 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2682 input = ctxt->input;
2683 GROW;
2684 NEXT;
2685 c = CUR_CHAR(l);
2686 /*
2687 * NOTE: 4.4.5 Included in Literal
2688 * When a parameter entity reference appears in a literal entity
2689 * value, ... a single or double quote character in the replacement
2690 * text is always treated as a normal data character and will not
2691 * terminate the literal.
2692 * In practice it means we stop the loop only when back at parsing
2693 * the initial entity and the quote is found
2694 */
William M. Brack871611b2003-10-18 04:53:14 +00002695 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00002696 (ctxt->input != input))) {
2697 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002698 xmlChar *tmp;
2699
Owen Taylor3473f882001-02-23 17:55:21 +00002700 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002701 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2702 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002703 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002704 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00002705 return(NULL);
2706 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002707 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 COPY_BUF(l,buf,len,c);
2710 NEXTL(l);
2711 /*
2712 * Pop-up of finished entities.
2713 */
2714 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2715 xmlPopInput(ctxt);
2716
2717 GROW;
2718 c = CUR_CHAR(l);
2719 if (c == 0) {
2720 GROW;
2721 c = CUR_CHAR(l);
2722 }
2723 }
2724 buf[len] = 0;
2725
2726 /*
2727 * Raise problem w.r.t. '&' and '%' being used in non-entities
2728 * reference constructs. Note Charref will be handled in
2729 * xmlStringDecodeEntities()
2730 */
2731 cur = buf;
2732 while (*cur != 0) { /* non input consuming */
2733 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2734 xmlChar *name;
2735 xmlChar tmp = *cur;
2736
2737 cur++;
2738 name = xmlParseStringName(ctxt, &cur);
2739 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002740 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00002741 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002742 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00002743 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002744 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2745 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002746 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
2748 if (name != NULL)
2749 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00002750 if (*cur == 0)
2751 break;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 cur++;
2754 }
2755
2756 /*
2757 * Then PEReference entities are substituted.
2758 */
2759 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 xmlFree(buf);
2762 } else {
2763 NEXT;
2764 /*
2765 * NOTE: 4.4.7 Bypassed
2766 * When a general entity reference appears in the EntityValue in
2767 * an entity declaration, it is bypassed and left as is.
2768 * so XML_SUBSTITUTE_REF is not set here.
2769 */
2770 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2771 0, 0, 0);
2772 if (orig != NULL)
2773 *orig = buf;
2774 else
2775 xmlFree(buf);
2776 }
2777
2778 return(ret);
2779}
2780
2781/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00002782 * xmlParseAttValueComplex:
2783 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00002784 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00002786 *
2787 * parse a value for an attribute, this is the fallback function
2788 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002789 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00002790 *
2791 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2792 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00002793static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002794xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00002795 xmlChar limit = 0;
2796 xmlChar *buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 int len = 0;
2798 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002799 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002800 xmlChar *current = NULL;
2801 xmlEntityPtr ent;
2802
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if (NXT(0) == '"') {
2804 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2805 limit = '"';
2806 NEXT;
2807 } else if (NXT(0) == '\'') {
2808 limit = '\'';
2809 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2810 NEXT;
2811 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
2815
2816 /*
2817 * allocate a translation buffer.
2818 */
2819 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002820 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002821 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002822
2823 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002824 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002825 */
2826 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00002827 while ((NXT(0) != limit) && /* checked */
2828 (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002829 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00002830 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00002831 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (NXT(1) == '#') {
2833 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002834
Owen Taylor3473f882001-02-23 17:55:21 +00002835 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002836 if (ctxt->replaceEntities) {
2837 if (len > buf_size - 10) {
2838 growBuffer(buf);
2839 }
2840 buf[len++] = '&';
2841 } else {
2842 /*
2843 * The reparsing will be done in xmlStringGetNodeList()
2844 * called by the attribute() function in SAX.c
2845 */
Daniel Veillard319a7422001-09-11 09:27:09 +00002846 if (len > buf_size - 10) {
2847 growBuffer(buf);
2848 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002849 buf[len++] = '&';
2850 buf[len++] = '#';
2851 buf[len++] = '3';
2852 buf[len++] = '8';
2853 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00002854 }
2855 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002856 if (len > buf_size - 10) {
2857 growBuffer(buf);
2858 }
Owen Taylor3473f882001-02-23 17:55:21 +00002859 len += xmlCopyChar(0, &buf[len], val);
2860 }
2861 } else {
2862 ent = xmlParseEntityRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002863 if ((ent != NULL) &&
2864 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2865 if (len > buf_size - 10) {
2866 growBuffer(buf);
2867 }
2868 if ((ctxt->replaceEntities == 0) &&
2869 (ent->content[0] == '&')) {
2870 buf[len++] = '&';
2871 buf[len++] = '#';
2872 buf[len++] = '3';
2873 buf[len++] = '8';
2874 buf[len++] = ';';
2875 } else {
2876 buf[len++] = ent->content[0];
2877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002878 } else if ((ent != NULL) &&
2879 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002880 xmlChar *rep;
2881
2882 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2883 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002884 XML_SUBSTITUTE_REF,
2885 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 if (rep != NULL) {
2887 current = rep;
2888 while (*current != 0) { /* non input consuming */
2889 buf[len++] = *current++;
2890 if (len > buf_size - 10) {
2891 growBuffer(buf);
2892 }
2893 }
2894 xmlFree(rep);
2895 }
2896 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002897 if (len > buf_size - 10) {
2898 growBuffer(buf);
2899 }
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (ent->content != NULL)
2901 buf[len++] = ent->content[0];
2902 }
2903 } else if (ent != NULL) {
2904 int i = xmlStrlen(ent->name);
2905 const xmlChar *cur = ent->name;
2906
2907 /*
2908 * This may look absurd but is needed to detect
2909 * entities problems
2910 */
2911 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2912 (ent->content != NULL)) {
2913 xmlChar *rep;
2914 rep = xmlStringDecodeEntities(ctxt, ent->content,
2915 XML_SUBSTITUTE_REF, 0, 0, 0);
2916 if (rep != NULL)
2917 xmlFree(rep);
2918 }
2919
2920 /*
2921 * Just output the reference
2922 */
2923 buf[len++] = '&';
2924 if (len > buf_size - i - 10) {
2925 growBuffer(buf);
2926 }
2927 for (;i > 0;i--)
2928 buf[len++] = *cur++;
2929 buf[len++] = ';';
2930 }
2931 }
2932 } else {
2933 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002934 if ((len != 0) || (!normalize)) {
2935 if ((!normalize) || (!in_space)) {
2936 COPY_BUF(l,buf,len,0x20);
2937 if (len > buf_size - 10) {
2938 growBuffer(buf);
2939 }
2940 }
2941 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00002942 }
2943 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002944 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002945 COPY_BUF(l,buf,len,c);
2946 if (len > buf_size - 10) {
2947 growBuffer(buf);
2948 }
2949 }
2950 NEXTL(l);
2951 }
2952 GROW;
2953 c = CUR_CHAR(l);
2954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002955 if ((in_space) && (normalize)) {
2956 while (buf[len - 1] == 0x20) len--;
2957 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002958 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002959 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 } else if (RAW != limit) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
2963 "AttValue: ' expected\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002964 } else
2965 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00002966 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002968
2969mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002970 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002971 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002972}
2973
2974/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00002975 * xmlParseAttValue:
2976 * @ctxt: an XML parser context
2977 *
2978 * parse a value for an attribute
2979 * Note: the parser won't do substitution of entities here, this
2980 * will be handled later in xmlStringGetNodeList
2981 *
2982 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2983 * "'" ([^<&'] | Reference)* "'"
2984 *
2985 * 3.3.3 Attribute-Value Normalization:
2986 * Before the value of an attribute is passed to the application or
2987 * checked for validity, the XML processor must normalize it as follows:
2988 * - a character reference is processed by appending the referenced
2989 * character to the attribute value
2990 * - an entity reference is processed by recursively processing the
2991 * replacement text of the entity
2992 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2993 * appending #x20 to the normalized value, except that only a single
2994 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2995 * parsed entity or the literal entity value of an internal parsed entity
2996 * - other characters are processed by appending them to the normalized value
2997 * If the declared value is not CDATA, then the XML processor must further
2998 * process the normalized attribute value by discarding any leading and
2999 * trailing space (#x20) characters, and by replacing sequences of space
3000 * (#x20) characters by a single space (#x20) character.
3001 * All attributes for which no declaration has been read should be treated
3002 * by a non-validating parser as if declared CDATA.
3003 *
3004 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3005 */
3006
3007
3008xmlChar *
3009xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003010 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003011 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003012}
3013
3014/**
Owen Taylor3473f882001-02-23 17:55:21 +00003015 * xmlParseSystemLiteral:
3016 * @ctxt: an XML parser context
3017 *
3018 * parse an XML Literal
3019 *
3020 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3021 *
3022 * Returns the SystemLiteral parsed or NULL
3023 */
3024
3025xmlChar *
3026xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3027 xmlChar *buf = NULL;
3028 int len = 0;
3029 int size = XML_PARSER_BUFFER_SIZE;
3030 int cur, l;
3031 xmlChar stop;
3032 int state = ctxt->instate;
3033 int count = 0;
3034
3035 SHRINK;
3036 if (RAW == '"') {
3037 NEXT;
3038 stop = '"';
3039 } else if (RAW == '\'') {
3040 NEXT;
3041 stop = '\'';
3042 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003043 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003044 return(NULL);
3045 }
3046
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003048 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003049 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003050 return(NULL);
3051 }
3052 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3053 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003054 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003055 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003056 xmlChar *tmp;
3057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003059 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3060 if (tmp == NULL) {
3061 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003063 ctxt->instate = (xmlParserInputState) state;
3064 return(NULL);
3065 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003066 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 }
3068 count++;
3069 if (count > 50) {
3070 GROW;
3071 count = 0;
3072 }
3073 COPY_BUF(l,buf,len,cur);
3074 NEXTL(l);
3075 cur = CUR_CHAR(l);
3076 if (cur == 0) {
3077 GROW;
3078 SHRINK;
3079 cur = CUR_CHAR(l);
3080 }
3081 }
3082 buf[len] = 0;
3083 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003084 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003085 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 } else {
3087 NEXT;
3088 }
3089 return(buf);
3090}
3091
3092/**
3093 * xmlParsePubidLiteral:
3094 * @ctxt: an XML parser context
3095 *
3096 * parse an XML public literal
3097 *
3098 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3099 *
3100 * Returns the PubidLiteral parsed or NULL.
3101 */
3102
3103xmlChar *
3104xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3105 xmlChar *buf = NULL;
3106 int len = 0;
3107 int size = XML_PARSER_BUFFER_SIZE;
3108 xmlChar cur;
3109 xmlChar stop;
3110 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003111 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003112
3113 SHRINK;
3114 if (RAW == '"') {
3115 NEXT;
3116 stop = '"';
3117 } else if (RAW == '\'') {
3118 NEXT;
3119 stop = '\'';
3120 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003121 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003122 return(NULL);
3123 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003124 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003125 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003126 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003127 return(NULL);
3128 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003129 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003130 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003131 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003132 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003133 xmlChar *tmp;
3134
Owen Taylor3473f882001-02-23 17:55:21 +00003135 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003136 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3137 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003138 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003139 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003140 return(NULL);
3141 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003142 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003143 }
3144 buf[len++] = cur;
3145 count++;
3146 if (count > 50) {
3147 GROW;
3148 count = 0;
3149 }
3150 NEXT;
3151 cur = CUR;
3152 if (cur == 0) {
3153 GROW;
3154 SHRINK;
3155 cur = CUR;
3156 }
3157 }
3158 buf[len] = 0;
3159 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003161 } else {
3162 NEXT;
3163 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003164 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 return(buf);
3166}
3167
Daniel Veillard48b2f892001-02-25 16:11:03 +00003168void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00003169/**
3170 * xmlParseCharData:
3171 * @ctxt: an XML parser context
3172 * @cdata: int indicating whether we are within a CDATA section
3173 *
3174 * parse a CharData section.
3175 * if we are within a CDATA section ']]>' marks an end of section.
3176 *
3177 * The right angle bracket (>) may be represented using the string "&gt;",
3178 * and must, for compatibility, be escaped using "&gt;" or a character
3179 * reference when it appears in the string "]]>" in content, when that
3180 * string is not marking the end of a CDATA section.
3181 *
3182 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3183 */
3184
3185void
3186xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003187 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003188 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003189 int line = ctxt->input->line;
3190 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003191 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003192
3193 SHRINK;
3194 GROW;
3195 /*
3196 * Accelerated common case where input don't need to be
3197 * modified before passing it to the handler.
3198 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003199 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003200 in = ctxt->input->cur;
3201 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003202get_more_space:
3203 while (*in == 0x20) in++;
3204 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003205 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003206 in++;
3207 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003208 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003209 in++;
3210 }
3211 goto get_more_space;
3212 }
3213 if (*in == '<') {
3214 nbchar = in - ctxt->input->cur;
3215 if (nbchar > 0) {
3216 const xmlChar *tmp = ctxt->input->cur;
3217 ctxt->input->cur = in;
3218
Daniel Veillard34099b42004-11-04 17:34:35 +00003219 if ((ctxt->sax != NULL) &&
3220 (ctxt->sax->ignorableWhitespace !=
3221 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003222 if (areBlanks(ctxt, tmp, nbchar, 1)) {
3223 ctxt->sax->ignorableWhitespace(ctxt->userData,
3224 tmp, nbchar);
3225 } else if (ctxt->sax->characters != NULL)
3226 ctxt->sax->characters(ctxt->userData,
3227 tmp, nbchar);
Daniel Veillard34099b42004-11-04 17:34:35 +00003228 } else if ((ctxt->sax != NULL) &&
3229 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003230 ctxt->sax->characters(ctxt->userData,
3231 tmp, nbchar);
3232 }
3233 }
3234 return;
3235 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003236
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003237get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003238 ccol = ctxt->input->col;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003239 while (((*in > ']') && (*in <= 0x7F)) ||
3240 ((*in > '&') && (*in < '<')) ||
3241 ((*in > '<') && (*in < ']')) ||
3242 ((*in >= 0x20) && (*in < '&')) ||
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003243 (*in == 0x09)) {
3244 in++;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003245 ccol++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003246 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003247 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003248 if (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003249 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003250 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003251 while (*in == 0xA) {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003252 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00003253 in++;
3254 }
3255 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003256 }
3257 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003258 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003259 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003260 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003261 return;
3262 }
3263 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003264 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003265 goto get_more;
3266 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003267 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00003268 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00003269 if ((ctxt->sax != NULL) &&
3270 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00003271 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00003272 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00003273 const xmlChar *tmp = ctxt->input->cur;
3274 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00003275
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003276 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard40412cd2003-09-03 13:28:32 +00003277 ctxt->sax->ignorableWhitespace(ctxt->userData,
3278 tmp, nbchar);
3279 } else if (ctxt->sax->characters != NULL)
3280 ctxt->sax->characters(ctxt->userData,
3281 tmp, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003282 line = ctxt->input->line;
3283 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00003284 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00003285 if (ctxt->sax->characters != NULL)
3286 ctxt->sax->characters(ctxt->userData,
3287 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00003288 line = ctxt->input->line;
3289 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00003290 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00003291 }
3292 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003293 if (*in == 0xD) {
3294 in++;
3295 if (*in == 0xA) {
3296 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003297 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003298 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00003299 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003300 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00003301 in--;
3302 }
3303 if (*in == '<') {
3304 return;
3305 }
3306 if (*in == '&') {
3307 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003308 }
3309 SHRINK;
3310 GROW;
3311 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00003312 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00003313 nbchar = 0;
3314 }
Daniel Veillard50582112001-03-26 22:52:16 +00003315 ctxt->input->line = line;
3316 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003317 xmlParseCharDataComplex(ctxt, cdata);
3318}
3319
Daniel Veillard01c13b52002-12-10 15:19:08 +00003320/**
3321 * xmlParseCharDataComplex:
3322 * @ctxt: an XML parser context
3323 * @cdata: int indicating whether we are within a CDATA section
3324 *
3325 * parse a CharData section.this is the fallback function
3326 * of xmlParseCharData() when the parsing requires handling
3327 * of non-ASCII characters.
3328 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00003329void
3330xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00003331 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
3332 int nbchar = 0;
3333 int cur, l;
3334 int count = 0;
3335
3336 SHRINK;
3337 GROW;
3338 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003339 while ((cur != '<') && /* checked */
3340 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00003341 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00003342 if ((cur == ']') && (NXT(1) == ']') &&
3343 (NXT(2) == '>')) {
3344 if (cdata) break;
3345 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003346 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003347 }
3348 }
3349 COPY_BUF(l,buf,nbchar,cur);
3350 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003351 buf[nbchar] = 0;
3352
Owen Taylor3473f882001-02-23 17:55:21 +00003353 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003354 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003355 */
3356 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003357 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003358 if (ctxt->sax->ignorableWhitespace != NULL)
3359 ctxt->sax->ignorableWhitespace(ctxt->userData,
3360 buf, nbchar);
3361 } else {
3362 if (ctxt->sax->characters != NULL)
3363 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3364 }
3365 }
3366 nbchar = 0;
3367 }
3368 count++;
3369 if (count > 50) {
3370 GROW;
3371 count = 0;
3372 }
3373 NEXTL(l);
3374 cur = CUR_CHAR(l);
3375 }
3376 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00003377 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003378 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003379 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00003380 */
3381 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003382 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003383 if (ctxt->sax->ignorableWhitespace != NULL)
3384 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
3385 } else {
3386 if (ctxt->sax->characters != NULL)
3387 ctxt->sax->characters(ctxt->userData, buf, nbchar);
3388 }
3389 }
3390 }
3391}
3392
3393/**
3394 * xmlParseExternalID:
3395 * @ctxt: an XML parser context
3396 * @publicID: a xmlChar** receiving PubidLiteral
3397 * @strict: indicate whether we should restrict parsing to only
3398 * production [75], see NOTE below
3399 *
3400 * Parse an External ID or a Public ID
3401 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003402 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00003403 * 'PUBLIC' S PubidLiteral S SystemLiteral
3404 *
3405 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
3406 * | 'PUBLIC' S PubidLiteral S SystemLiteral
3407 *
3408 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
3409 *
3410 * Returns the function returns SystemLiteral and in the second
3411 * case publicID receives PubidLiteral, is strict is off
3412 * it is possible to return NULL and have publicID set.
3413 */
3414
3415xmlChar *
3416xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
3417 xmlChar *URI = NULL;
3418
3419 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00003420
3421 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00003422 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003423 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003424 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003425 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3426 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003427 }
3428 SKIP_BLANKS;
3429 URI = xmlParseSystemLiteral(ctxt);
3430 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003431 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003432 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00003433 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003434 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00003435 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003438 }
3439 SKIP_BLANKS;
3440 *publicID = xmlParsePubidLiteral(ctxt);
3441 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003442 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003443 }
3444 if (strict) {
3445 /*
3446 * We don't handle [83] so "S SystemLiteral" is required.
3447 */
William M. Brack76e95df2003-10-18 16:20:14 +00003448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00003450 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003451 }
3452 } else {
3453 /*
3454 * We handle [83] so we return immediately, if
3455 * "S SystemLiteral" is not detected. From a purely parsing
3456 * point of view that's a nice mess.
3457 */
3458 const xmlChar *ptr;
3459 GROW;
3460
3461 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00003462 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003463
William M. Brack76e95df2003-10-18 16:20:14 +00003464 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00003465 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
3466 }
3467 SKIP_BLANKS;
3468 URI = xmlParseSystemLiteral(ctxt);
3469 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003470 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003471 }
3472 }
3473 return(URI);
3474}
3475
3476/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00003477 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00003478 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00003479 * @buf: the already parsed part of the buffer
3480 * @len: number of bytes filles in the buffer
3481 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00003482 *
3483 * Skip an XML (SGML) comment <!-- .... -->
3484 * The spec says that "For compatibility, the string "--" (double-hyphen)
3485 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00003486 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00003487 *
3488 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3489 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00003490static void
3491xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00003492 int q, ql;
3493 int r, rl;
3494 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00003495 xmlParserInputPtr input = ctxt->input;
3496 int count = 0;
3497
Owen Taylor3473f882001-02-23 17:55:21 +00003498 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00003499 len = 0;
3500 size = XML_PARSER_BUFFER_SIZE;
3501 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3502 if (buf == NULL) {
3503 xmlErrMemory(ctxt, NULL);
3504 return;
3505 }
Owen Taylor3473f882001-02-23 17:55:21 +00003506 }
3507 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003508 if (q == 0)
3509 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003510 NEXTL(ql);
3511 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003512 if (r == 0)
3513 goto not_terminated;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 NEXTL(rl);
3515 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003516 if (cur == 0)
3517 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00003518 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003519 ((cur != '>') ||
3520 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00003521 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003522 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003523 }
3524 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00003525 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003526 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00003527 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3528 if (new_buf == NULL) {
3529 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003530 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003531 return;
3532 }
William M. Bracka3215c72004-07-31 16:24:01 +00003533 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00003534 }
3535 COPY_BUF(ql,buf,len,q);
3536 q = r;
3537 ql = rl;
3538 r = cur;
3539 rl = l;
3540
3541 count++;
3542 if (count > 50) {
3543 GROW;
3544 count = 0;
3545 }
3546 NEXTL(l);
3547 cur = CUR_CHAR(l);
3548 if (cur == 0) {
3549 SHRINK;
3550 GROW;
3551 cur = CUR_CHAR(l);
3552 }
3553 }
3554 buf[len] = 0;
William M. Brack871611b2003-10-18 04:53:14 +00003555 if (!IS_CHAR(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003556 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00003557 "Comment not terminated \n<!--%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003558 xmlFree(buf);
3559 } else {
3560 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003561 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3562 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003563 }
3564 NEXT;
3565 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3566 (!ctxt->disableSAX))
3567 ctxt->sax->comment(ctxt->userData, buf);
3568 xmlFree(buf);
3569 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003570 return;
3571not_terminated:
3572 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3573 "Comment not terminated\n", NULL);
3574 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003575}
Daniel Veillard4c778d82005-01-23 17:37:44 +00003576/**
3577 * xmlParseComment:
3578 * @ctxt: an XML parser context
3579 *
3580 * Skip an XML (SGML) comment <!-- .... -->
3581 * The spec says that "For compatibility, the string "--" (double-hyphen)
3582 * must not occur within comments. "
3583 *
3584 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
3585 */
3586void
3587xmlParseComment(xmlParserCtxtPtr ctxt) {
3588 xmlChar *buf = NULL;
3589 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00003590 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00003591 xmlParserInputState state;
3592 const xmlChar *in;
3593 int nbchar = 0, ccol;
3594
3595 /*
3596 * Check that there is a comment right here.
3597 */
3598 if ((RAW != '<') || (NXT(1) != '!') ||
3599 (NXT(2) != '-') || (NXT(3) != '-')) return;
3600
3601 state = ctxt->instate;
3602 ctxt->instate = XML_PARSER_COMMENT;
3603 SKIP(4);
3604 SHRINK;
3605 GROW;
3606
3607 /*
3608 * Accelerated common case where input don't need to be
3609 * modified before passing it to the handler.
3610 */
3611 in = ctxt->input->cur;
3612 do {
3613 if (*in == 0xA) {
3614 ctxt->input->line++; ctxt->input->col = 1;
3615 in++;
3616 while (*in == 0xA) {
3617 ctxt->input->line++; ctxt->input->col = 1;
3618 in++;
3619 }
3620 }
3621get_more:
3622 ccol = ctxt->input->col;
3623 while (((*in > '-') && (*in <= 0x7F)) ||
3624 ((*in >= 0x20) && (*in < '-')) ||
3625 (*in == 0x09)) {
3626 in++;
3627 ccol++;
3628 }
3629 ctxt->input->col = ccol;
3630 if (*in == 0xA) {
3631 ctxt->input->line++; ctxt->input->col = 1;
3632 in++;
3633 while (*in == 0xA) {
3634 ctxt->input->line++; ctxt->input->col = 1;
3635 in++;
3636 }
3637 goto get_more;
3638 }
3639 nbchar = in - ctxt->input->cur;
3640 /*
3641 * save current set of data
3642 */
3643 if (nbchar > 0) {
3644 if ((ctxt->sax != NULL) &&
3645 (ctxt->sax->comment != NULL)) {
3646 if (buf == NULL) {
3647 if ((*in == '-') && (in[1] == '-'))
3648 size = nbchar + 1;
3649 else
3650 size = XML_PARSER_BUFFER_SIZE + nbchar;
3651 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3652 if (buf == NULL) {
3653 xmlErrMemory(ctxt, NULL);
3654 ctxt->instate = state;
3655 return;
3656 }
3657 len = 0;
3658 } else if (len + nbchar + 1 >= size) {
3659 xmlChar *new_buf;
3660 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
3661 new_buf = (xmlChar *) xmlRealloc(buf,
3662 size * sizeof(xmlChar));
3663 if (new_buf == NULL) {
3664 xmlFree (buf);
3665 xmlErrMemory(ctxt, NULL);
3666 ctxt->instate = state;
3667 return;
3668 }
3669 buf = new_buf;
3670 }
3671 memcpy(&buf[len], ctxt->input->cur, nbchar);
3672 len += nbchar;
3673 buf[len] = 0;
3674 }
3675 }
3676 ctxt->input->cur = in;
3677 if (*in == 0xA)
3678
3679 if (*in == 0xD) {
3680 in++;
3681 if (*in == 0xA) {
3682 ctxt->input->cur = in;
3683 in++;
3684 ctxt->input->line++; ctxt->input->col = 1;
3685 continue; /* while */
3686 }
3687 in--;
3688 }
3689 SHRINK;
3690 GROW;
3691 in = ctxt->input->cur;
3692 if (*in == '-') {
3693 if (in[1] == '-') {
3694 if (in[2] == '>') {
3695 SKIP(3);
3696 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
3697 (!ctxt->disableSAX)) {
3698 if (buf != NULL)
3699 ctxt->sax->comment(ctxt->userData, buf);
3700 else
3701 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
3702 }
3703 if (buf != NULL)
3704 xmlFree(buf);
3705 ctxt->instate = state;
3706 return;
3707 }
3708 if (buf != NULL)
3709 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3710 "Comment not terminated \n<!--%.50s\n",
3711 buf);
3712 else
3713 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
3714 "Comment not terminated \n", NULL);
3715 in++;
3716 ctxt->input->col++;
3717 }
3718 in++;
3719 ctxt->input->col++;
3720 goto get_more;
3721 }
3722 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
3723 xmlParseCommentComplex(ctxt, buf, len, size);
3724 ctxt->instate = state;
3725 return;
3726}
3727
Owen Taylor3473f882001-02-23 17:55:21 +00003728
3729/**
3730 * xmlParsePITarget:
3731 * @ctxt: an XML parser context
3732 *
3733 * parse the name of a PI
3734 *
3735 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
3736 *
3737 * Returns the PITarget name or NULL
3738 */
3739
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003740const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003741xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003742 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003743
3744 name = xmlParseName(ctxt);
3745 if ((name != NULL) &&
3746 ((name[0] == 'x') || (name[0] == 'X')) &&
3747 ((name[1] == 'm') || (name[1] == 'M')) &&
3748 ((name[2] == 'l') || (name[2] == 'L'))) {
3749 int i;
3750 if ((name[0] == 'x') && (name[1] == 'm') &&
3751 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003752 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00003753 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003754 return(name);
3755 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003756 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 return(name);
3758 }
3759 for (i = 0;;i++) {
3760 if (xmlW3CPIs[i] == NULL) break;
3761 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
3762 return(name);
3763 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00003764 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
3765 "xmlParsePITarget: invalid name prefix 'xml'\n",
3766 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003767 }
3768 return(name);
3769}
3770
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003771#ifdef LIBXML_CATALOG_ENABLED
3772/**
3773 * xmlParseCatalogPI:
3774 * @ctxt: an XML parser context
3775 * @catalog: the PI value string
3776 *
3777 * parse an XML Catalog Processing Instruction.
3778 *
3779 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3780 *
3781 * Occurs only if allowed by the user and if happening in the Misc
3782 * part of the document before any doctype informations
3783 * This will add the given catalog to the parsing context in order
3784 * to be used if there is a resolution need further down in the document
3785 */
3786
3787static void
3788xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3789 xmlChar *URL = NULL;
3790 const xmlChar *tmp, *base;
3791 xmlChar marker;
3792
3793 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00003794 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003795 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3796 goto error;
3797 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00003798 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003799 if (*tmp != '=') {
3800 return;
3801 }
3802 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003803 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003804 marker = *tmp;
3805 if ((marker != '\'') && (marker != '"'))
3806 goto error;
3807 tmp++;
3808 base = tmp;
3809 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3810 if (*tmp == 0)
3811 goto error;
3812 URL = xmlStrndup(base, tmp - base);
3813 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00003814 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003815 if (*tmp != 0)
3816 goto error;
3817
3818 if (URL != NULL) {
3819 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3820 xmlFree(URL);
3821 }
3822 return;
3823
3824error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00003825 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
3826 "Catalog PI syntax error: %s\n",
3827 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003828 if (URL != NULL)
3829 xmlFree(URL);
3830}
3831#endif
3832
Owen Taylor3473f882001-02-23 17:55:21 +00003833/**
3834 * xmlParsePI:
3835 * @ctxt: an XML parser context
3836 *
3837 * parse an XML Processing Instruction.
3838 *
3839 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3840 *
3841 * The processing is transfered to SAX once parsed.
3842 */
3843
3844void
3845xmlParsePI(xmlParserCtxtPtr ctxt) {
3846 xmlChar *buf = NULL;
3847 int len = 0;
3848 int size = XML_PARSER_BUFFER_SIZE;
3849 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003850 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00003851 xmlParserInputState state;
3852 int count = 0;
3853
3854 if ((RAW == '<') && (NXT(1) == '?')) {
3855 xmlParserInputPtr input = ctxt->input;
3856 state = ctxt->instate;
3857 ctxt->instate = XML_PARSER_PI;
3858 /*
3859 * this is a Processing Instruction.
3860 */
3861 SKIP(2);
3862 SHRINK;
3863
3864 /*
3865 * Parse the target name and check for special support like
3866 * namespace.
3867 */
3868 target = xmlParsePITarget(ctxt);
3869 if (target != NULL) {
3870 if ((RAW == '?') && (NXT(1) == '>')) {
3871 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003872 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
3873 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003874 }
3875 SKIP(2);
3876
3877 /*
3878 * SAX: PI detected.
3879 */
3880 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3881 (ctxt->sax->processingInstruction != NULL))
3882 ctxt->sax->processingInstruction(ctxt->userData,
3883 target, NULL);
3884 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00003885 return;
3886 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003887 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003888 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003889 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003890 ctxt->instate = state;
3891 return;
3892 }
3893 cur = CUR;
3894 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003895 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
3896 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898 SKIP_BLANKS;
3899 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003900 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003901 ((cur != '?') || (NXT(1) != '>'))) {
3902 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003903 xmlChar *tmp;
3904
Owen Taylor3473f882001-02-23 17:55:21 +00003905 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003906 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3907 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003908 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003909 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003910 ctxt->instate = state;
3911 return;
3912 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003913 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003914 }
3915 count++;
3916 if (count > 50) {
3917 GROW;
3918 count = 0;
3919 }
3920 COPY_BUF(l,buf,len,cur);
3921 NEXTL(l);
3922 cur = CUR_CHAR(l);
3923 if (cur == 0) {
3924 SHRINK;
3925 GROW;
3926 cur = CUR_CHAR(l);
3927 }
3928 }
3929 buf[len] = 0;
3930 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003931 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
3932 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00003933 } else {
3934 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3936 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003937 }
3938 SKIP(2);
3939
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003940#ifdef LIBXML_CATALOG_ENABLED
3941 if (((state == XML_PARSER_MISC) ||
3942 (state == XML_PARSER_START)) &&
3943 (xmlStrEqual(target, XML_CATALOG_PI))) {
3944 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3945 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3946 (allow == XML_CATA_ALLOW_ALL))
3947 xmlParseCatalogPI(ctxt, buf);
3948 }
3949#endif
3950
3951
Owen Taylor3473f882001-02-23 17:55:21 +00003952 /*
3953 * SAX: PI detected.
3954 */
3955 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3956 (ctxt->sax->processingInstruction != NULL))
3957 ctxt->sax->processingInstruction(ctxt->userData,
3958 target, buf);
3959 }
3960 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003961 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003962 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003963 }
3964 ctxt->instate = state;
3965 }
3966}
3967
3968/**
3969 * xmlParseNotationDecl:
3970 * @ctxt: an XML parser context
3971 *
3972 * parse a notation declaration
3973 *
3974 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3975 *
3976 * Hence there is actually 3 choices:
3977 * 'PUBLIC' S PubidLiteral
3978 * 'PUBLIC' S PubidLiteral S SystemLiteral
3979 * and 'SYSTEM' S SystemLiteral
3980 *
3981 * See the NOTE on xmlParseExternalID().
3982 */
3983
3984void
3985xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003986 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00003987 xmlChar *Pubid;
3988 xmlChar *Systemid;
3989
Daniel Veillarda07050d2003-10-19 14:46:32 +00003990 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003991 xmlParserInputPtr input = ctxt->input;
3992 SHRINK;
3993 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00003994 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003995 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
3996 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00003997 return;
3998 }
3999 SKIP_BLANKS;
4000
Daniel Veillard76d66f42001-05-16 21:05:17 +00004001 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004002 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004003 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004004 return;
4005 }
William M. Brack76e95df2003-10-18 16:20:14 +00004006 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004008 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004009 return;
4010 }
4011 SKIP_BLANKS;
4012
4013 /*
4014 * Parse the IDs.
4015 */
4016 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4017 SKIP_BLANKS;
4018
4019 if (RAW == '>') {
4020 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004021 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4022 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004023 }
4024 NEXT;
4025 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4026 (ctxt->sax->notationDecl != NULL))
4027 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4028 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004029 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
Owen Taylor3473f882001-02-23 17:55:21 +00004031 if (Systemid != NULL) xmlFree(Systemid);
4032 if (Pubid != NULL) xmlFree(Pubid);
4033 }
4034}
4035
4036/**
4037 * xmlParseEntityDecl:
4038 * @ctxt: an XML parser context
4039 *
4040 * parse <!ENTITY declarations
4041 *
4042 * [70] EntityDecl ::= GEDecl | PEDecl
4043 *
4044 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4045 *
4046 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4047 *
4048 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4049 *
4050 * [74] PEDef ::= EntityValue | ExternalID
4051 *
4052 * [76] NDataDecl ::= S 'NDATA' S Name
4053 *
4054 * [ VC: Notation Declared ]
4055 * The Name must match the declared name of a notation.
4056 */
4057
4058void
4059xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004060 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004061 xmlChar *value = NULL;
4062 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004063 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004064 int isParameter = 0;
4065 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004066 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004067
Daniel Veillard4c778d82005-01-23 17:37:44 +00004068 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004069 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004070 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 SHRINK;
4072 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004073 skipped = SKIP_BLANKS;
4074 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004075 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4076 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004077 }
Owen Taylor3473f882001-02-23 17:55:21 +00004078
4079 if (RAW == '%') {
4080 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004081 skipped = SKIP_BLANKS;
4082 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004083 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4084 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004085 }
Owen Taylor3473f882001-02-23 17:55:21 +00004086 isParameter = 1;
4087 }
4088
Daniel Veillard76d66f42001-05-16 21:05:17 +00004089 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004091 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4092 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004093 return;
4094 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004095 skipped = SKIP_BLANKS;
4096 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4098 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004099 }
Owen Taylor3473f882001-02-23 17:55:21 +00004100
Daniel Veillardf5582f12002-06-11 10:08:16 +00004101 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004102 /*
4103 * handle the various case of definitions...
4104 */
4105 if (isParameter) {
4106 if ((RAW == '"') || (RAW == '\'')) {
4107 value = xmlParseEntityValue(ctxt, &orig);
4108 if (value) {
4109 if ((ctxt->sax != NULL) &&
4110 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4111 ctxt->sax->entityDecl(ctxt->userData, name,
4112 XML_INTERNAL_PARAMETER_ENTITY,
4113 NULL, NULL, value);
4114 }
4115 } else {
4116 URI = xmlParseExternalID(ctxt, &literal, 1);
4117 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004118 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004119 }
4120 if (URI) {
4121 xmlURIPtr uri;
4122
4123 uri = xmlParseURI((const char *) URI);
4124 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004125 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4126 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004127 /*
4128 * This really ought to be a well formedness error
4129 * but the XML Core WG decided otherwise c.f. issue
4130 * E26 of the XML erratas.
4131 */
Owen Taylor3473f882001-02-23 17:55:21 +00004132 } else {
4133 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004134 /*
4135 * Okay this is foolish to block those but not
4136 * invalid URIs.
4137 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004138 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 } else {
4140 if ((ctxt->sax != NULL) &&
4141 (!ctxt->disableSAX) &&
4142 (ctxt->sax->entityDecl != NULL))
4143 ctxt->sax->entityDecl(ctxt->userData, name,
4144 XML_EXTERNAL_PARAMETER_ENTITY,
4145 literal, URI, NULL);
4146 }
4147 xmlFreeURI(uri);
4148 }
4149 }
4150 }
4151 } else {
4152 if ((RAW == '"') || (RAW == '\'')) {
4153 value = xmlParseEntityValue(ctxt, &orig);
4154 if ((ctxt->sax != NULL) &&
4155 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4156 ctxt->sax->entityDecl(ctxt->userData, name,
4157 XML_INTERNAL_GENERAL_ENTITY,
4158 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004159 /*
4160 * For expat compatibility in SAX mode.
4161 */
4162 if ((ctxt->myDoc == NULL) ||
4163 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4164 if (ctxt->myDoc == NULL) {
4165 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4166 }
4167 if (ctxt->myDoc->intSubset == NULL)
4168 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4169 BAD_CAST "fake", NULL, NULL);
4170
Daniel Veillard1af9a412003-08-20 22:54:39 +00004171 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
4172 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004173 }
Owen Taylor3473f882001-02-23 17:55:21 +00004174 } else {
4175 URI = xmlParseExternalID(ctxt, &literal, 1);
4176 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004177 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004178 }
4179 if (URI) {
4180 xmlURIPtr uri;
4181
4182 uri = xmlParseURI((const char *)URI);
4183 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004184 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4185 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004186 /*
4187 * This really ought to be a well formedness error
4188 * but the XML Core WG decided otherwise c.f. issue
4189 * E26 of the XML erratas.
4190 */
Owen Taylor3473f882001-02-23 17:55:21 +00004191 } else {
4192 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004193 /*
4194 * Okay this is foolish to block those but not
4195 * invalid URIs.
4196 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004197 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004198 }
4199 xmlFreeURI(uri);
4200 }
4201 }
William M. Brack76e95df2003-10-18 16:20:14 +00004202 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004203 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4204 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004205 }
4206 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004207 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004208 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00004209 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004210 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4211 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004212 }
4213 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004214 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4216 (ctxt->sax->unparsedEntityDecl != NULL))
4217 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
4218 literal, URI, ndata);
4219 } else {
4220 if ((ctxt->sax != NULL) &&
4221 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4222 ctxt->sax->entityDecl(ctxt->userData, name,
4223 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4224 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004225 /*
4226 * For expat compatibility in SAX mode.
4227 * assuming the entity repalcement was asked for
4228 */
4229 if ((ctxt->replaceEntities != 0) &&
4230 ((ctxt->myDoc == NULL) ||
4231 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
4232 if (ctxt->myDoc == NULL) {
4233 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
4234 }
4235
4236 if (ctxt->myDoc->intSubset == NULL)
4237 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
4238 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00004239 xmlSAX2EntityDecl(ctxt, name,
4240 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
4241 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004242 }
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 }
4245 }
4246 SKIP_BLANKS;
4247 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004248 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004249 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004250 } else {
4251 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004252 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4253 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004254 }
4255 NEXT;
4256 }
4257 if (orig != NULL) {
4258 /*
4259 * Ugly mechanism to save the raw entity value.
4260 */
4261 xmlEntityPtr cur = NULL;
4262
4263 if (isParameter) {
4264 if ((ctxt->sax != NULL) &&
4265 (ctxt->sax->getParameterEntity != NULL))
4266 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
4267 } else {
4268 if ((ctxt->sax != NULL) &&
4269 (ctxt->sax->getEntity != NULL))
4270 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004271 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00004272 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004273 }
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 if (cur != NULL) {
4276 if (cur->orig != NULL)
4277 xmlFree(orig);
4278 else
4279 cur->orig = orig;
4280 } else
4281 xmlFree(orig);
4282 }
Owen Taylor3473f882001-02-23 17:55:21 +00004283 if (value != NULL) xmlFree(value);
4284 if (URI != NULL) xmlFree(URI);
4285 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00004286 }
4287}
4288
4289/**
4290 * xmlParseDefaultDecl:
4291 * @ctxt: an XML parser context
4292 * @value: Receive a possible fixed default value for the attribute
4293 *
4294 * Parse an attribute default declaration
4295 *
4296 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
4297 *
4298 * [ VC: Required Attribute ]
4299 * if the default declaration is the keyword #REQUIRED, then the
4300 * attribute must be specified for all elements of the type in the
4301 * attribute-list declaration.
4302 *
4303 * [ VC: Attribute Default Legal ]
4304 * The declared default value must meet the lexical constraints of
4305 * the declared attribute type c.f. xmlValidateAttributeDecl()
4306 *
4307 * [ VC: Fixed Attribute Default ]
4308 * if an attribute has a default value declared with the #FIXED
4309 * keyword, instances of that attribute must match the default value.
4310 *
4311 * [ WFC: No < in Attribute Values ]
4312 * handled in xmlParseAttValue()
4313 *
4314 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
4315 * or XML_ATTRIBUTE_FIXED.
4316 */
4317
4318int
4319xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
4320 int val;
4321 xmlChar *ret;
4322
4323 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004324 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004325 SKIP(9);
4326 return(XML_ATTRIBUTE_REQUIRED);
4327 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004328 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004329 SKIP(8);
4330 return(XML_ATTRIBUTE_IMPLIED);
4331 }
4332 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004333 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004334 SKIP(6);
4335 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00004336 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004337 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4338 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 SKIP_BLANKS;
4341 }
4342 ret = xmlParseAttValue(ctxt);
4343 ctxt->instate = XML_PARSER_DTD;
4344 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00004345 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004346 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004347 } else
4348 *value = ret;
4349 return(val);
4350}
4351
4352/**
4353 * xmlParseNotationType:
4354 * @ctxt: an XML parser context
4355 *
4356 * parse an Notation attribute type.
4357 *
4358 * Note: the leading 'NOTATION' S part has already being parsed...
4359 *
4360 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4361 *
4362 * [ VC: Notation Attributes ]
4363 * Values of this type must match one of the notation names included
4364 * in the declaration; all notation names in the declaration must be declared.
4365 *
4366 * Returns: the notation attribute tree built while parsing
4367 */
4368
4369xmlEnumerationPtr
4370xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004371 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004372 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4373
4374 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004375 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004376 return(NULL);
4377 }
4378 SHRINK;
4379 do {
4380 NEXT;
4381 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004382 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004383 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004384 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4385 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004386 return(ret);
4387 }
4388 cur = xmlCreateEnumeration(name);
Owen Taylor3473f882001-02-23 17:55:21 +00004389 if (cur == NULL) return(ret);
4390 if (last == NULL) ret = last = cur;
4391 else {
4392 last->next = cur;
4393 last = cur;
4394 }
4395 SKIP_BLANKS;
4396 } while (RAW == '|');
4397 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004398 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004399 if ((last != NULL) && (last != ret))
4400 xmlFreeEnumeration(last);
4401 return(ret);
4402 }
4403 NEXT;
4404 return(ret);
4405}
4406
4407/**
4408 * xmlParseEnumerationType:
4409 * @ctxt: an XML parser context
4410 *
4411 * parse an Enumeration attribute type.
4412 *
4413 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
4414 *
4415 * [ VC: Enumeration ]
4416 * Values of this type must match one of the Nmtoken tokens in
4417 * the declaration
4418 *
4419 * Returns: the enumeration attribute tree built while parsing
4420 */
4421
4422xmlEnumerationPtr
4423xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
4424 xmlChar *name;
4425 xmlEnumerationPtr ret = NULL, last = NULL, cur;
4426
4427 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004428 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004429 return(NULL);
4430 }
4431 SHRINK;
4432 do {
4433 NEXT;
4434 SKIP_BLANKS;
4435 name = xmlParseNmtoken(ctxt);
4436 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004437 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004438 return(ret);
4439 }
4440 cur = xmlCreateEnumeration(name);
4441 xmlFree(name);
4442 if (cur == NULL) return(ret);
4443 if (last == NULL) ret = last = cur;
4444 else {
4445 last->next = cur;
4446 last = cur;
4447 }
4448 SKIP_BLANKS;
4449 } while (RAW == '|');
4450 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004451 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004452 return(ret);
4453 }
4454 NEXT;
4455 return(ret);
4456}
4457
4458/**
4459 * xmlParseEnumeratedType:
4460 * @ctxt: an XML parser context
4461 * @tree: the enumeration tree built while parsing
4462 *
4463 * parse an Enumerated attribute type.
4464 *
4465 * [57] EnumeratedType ::= NotationType | Enumeration
4466 *
4467 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
4468 *
4469 *
4470 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
4471 */
4472
4473int
4474xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00004475 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004476 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00004477 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004478 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4479 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004480 return(0);
4481 }
4482 SKIP_BLANKS;
4483 *tree = xmlParseNotationType(ctxt);
4484 if (*tree == NULL) return(0);
4485 return(XML_ATTRIBUTE_NOTATION);
4486 }
4487 *tree = xmlParseEnumerationType(ctxt);
4488 if (*tree == NULL) return(0);
4489 return(XML_ATTRIBUTE_ENUMERATION);
4490}
4491
4492/**
4493 * xmlParseAttributeType:
4494 * @ctxt: an XML parser context
4495 * @tree: the enumeration tree built while parsing
4496 *
4497 * parse the Attribute list def for an element
4498 *
4499 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
4500 *
4501 * [55] StringType ::= 'CDATA'
4502 *
4503 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
4504 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
4505 *
4506 * Validity constraints for attribute values syntax are checked in
4507 * xmlValidateAttributeValue()
4508 *
4509 * [ VC: ID ]
4510 * Values of type ID must match the Name production. A name must not
4511 * appear more than once in an XML document as a value of this type;
4512 * i.e., ID values must uniquely identify the elements which bear them.
4513 *
4514 * [ VC: One ID per Element Type ]
4515 * No element type may have more than one ID attribute specified.
4516 *
4517 * [ VC: ID Attribute Default ]
4518 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
4519 *
4520 * [ VC: IDREF ]
4521 * Values of type IDREF must match the Name production, and values
4522 * of type IDREFS must match Names; each IDREF Name must match the value
4523 * of an ID attribute on some element in the XML document; i.e. IDREF
4524 * values must match the value of some ID attribute.
4525 *
4526 * [ VC: Entity Name ]
4527 * Values of type ENTITY must match the Name production, values
4528 * of type ENTITIES must match Names; each Entity Name must match the
4529 * name of an unparsed entity declared in the DTD.
4530 *
4531 * [ VC: Name Token ]
4532 * Values of type NMTOKEN must match the Nmtoken production; values
4533 * of type NMTOKENS must match Nmtokens.
4534 *
4535 * Returns the attribute type
4536 */
4537int
4538xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
4539 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004540 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004541 SKIP(5);
4542 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004543 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004544 SKIP(6);
4545 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004546 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004547 SKIP(5);
4548 return(XML_ATTRIBUTE_IDREF);
4549 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
4550 SKIP(2);
4551 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004552 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004553 SKIP(6);
4554 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004555 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004556 SKIP(8);
4557 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004558 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004559 SKIP(8);
4560 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00004561 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004562 SKIP(7);
4563 return(XML_ATTRIBUTE_NMTOKEN);
4564 }
4565 return(xmlParseEnumeratedType(ctxt, tree));
4566}
4567
4568/**
4569 * xmlParseAttributeListDecl:
4570 * @ctxt: an XML parser context
4571 *
4572 * : parse the Attribute list def for an element
4573 *
4574 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
4575 *
4576 * [53] AttDef ::= S Name S AttType S DefaultDecl
4577 *
4578 */
4579void
4580xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004581 const xmlChar *elemName;
4582 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00004583 xmlEnumerationPtr tree;
4584
Daniel Veillarda07050d2003-10-19 14:46:32 +00004585 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004586 xmlParserInputPtr input = ctxt->input;
4587
4588 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00004589 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004590 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004591 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004592 }
4593 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004594 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004595 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004596 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4597 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004598 return;
4599 }
4600 SKIP_BLANKS;
4601 GROW;
4602 while (RAW != '>') {
4603 const xmlChar *check = CUR_PTR;
4604 int type;
4605 int def;
4606 xmlChar *defaultValue = NULL;
4607
4608 GROW;
4609 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004610 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004611 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004612 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4613 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004614 break;
4615 }
4616 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004617 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004619 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004620 if (defaultValue != NULL)
4621 xmlFree(defaultValue);
4622 break;
4623 }
4624 SKIP_BLANKS;
4625
4626 type = xmlParseAttributeType(ctxt, &tree);
4627 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 if (defaultValue != NULL)
4629 xmlFree(defaultValue);
4630 break;
4631 }
4632
4633 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00004634 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4636 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 if (defaultValue != NULL)
4638 xmlFree(defaultValue);
4639 if (tree != NULL)
4640 xmlFreeEnumeration(tree);
4641 break;
4642 }
4643 SKIP_BLANKS;
4644
4645 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4646 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00004647 if (defaultValue != NULL)
4648 xmlFree(defaultValue);
4649 if (tree != NULL)
4650 xmlFreeEnumeration(tree);
4651 break;
4652 }
4653
4654 GROW;
4655 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00004656 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004657 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004658 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004659 if (defaultValue != NULL)
4660 xmlFree(defaultValue);
4661 if (tree != NULL)
4662 xmlFreeEnumeration(tree);
4663 break;
4664 }
4665 SKIP_BLANKS;
4666 }
4667 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004668 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
4669 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 if (defaultValue != NULL)
4671 xmlFree(defaultValue);
4672 if (tree != NULL)
4673 xmlFreeEnumeration(tree);
4674 break;
4675 }
4676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4677 (ctxt->sax->attributeDecl != NULL))
4678 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4679 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00004680 else if (tree != NULL)
4681 xmlFreeEnumeration(tree);
4682
4683 if ((ctxt->sax2) && (defaultValue != NULL) &&
4684 (def != XML_ATTRIBUTE_IMPLIED) &&
4685 (def != XML_ATTRIBUTE_REQUIRED)) {
4686 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
4687 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004688 if ((ctxt->sax2) && (type != XML_ATTRIBUTE_CDATA)) {
4689 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
4690 }
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if (defaultValue != NULL)
4692 xmlFree(defaultValue);
4693 GROW;
4694 }
4695 if (RAW == '>') {
4696 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004697 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4698 "Attribute list declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004699 }
4700 NEXT;
4701 }
Owen Taylor3473f882001-02-23 17:55:21 +00004702 }
4703}
4704
4705/**
4706 * xmlParseElementMixedContentDecl:
4707 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004708 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004709 *
4710 * parse the declaration for a Mixed Element content
4711 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4712 *
4713 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4714 * '(' S? '#PCDATA' S? ')'
4715 *
4716 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4717 *
4718 * [ VC: No Duplicate Types ]
4719 * The same name must not appear more than once in a single
4720 * mixed-content declaration.
4721 *
4722 * returns: the list of the xmlElementContentPtr describing the element choices
4723 */
4724xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004725xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004726 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004727 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004728
4729 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004730 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004731 SKIP(7);
4732 SKIP_BLANKS;
4733 SHRINK;
4734 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004735 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004736 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4737"Element content declaration doesn't start and stop in the same entity\n",
4738 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004739 }
Owen Taylor3473f882001-02-23 17:55:21 +00004740 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004741 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004742 if (RAW == '*') {
4743 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4744 NEXT;
4745 }
4746 return(ret);
4747 }
4748 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004749 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 if (ret == NULL) return(NULL);
4751 }
4752 while (RAW == '|') {
4753 NEXT;
4754 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004755 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004756 if (ret == NULL) return(NULL);
4757 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004758 if (cur != NULL)
4759 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004760 cur = ret;
4761 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004762 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004763 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004764 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004765 if (n->c1 != NULL)
4766 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004767 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004768 if (n != NULL)
4769 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004770 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 }
4772 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004773 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004774 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004775 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004776 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004777 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004778 return(NULL);
4779 }
4780 SKIP_BLANKS;
4781 GROW;
4782 }
4783 if ((RAW == ')') && (NXT(1) == '*')) {
4784 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004785 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00004786 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004787 if (cur->c2 != NULL)
4788 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004789 }
4790 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004791 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00004792 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
4793"Element content declaration doesn't start and stop in the same entity\n",
4794 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004795 }
Owen Taylor3473f882001-02-23 17:55:21 +00004796 SKIP(2);
4797 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004798 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004799 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004800 return(NULL);
4801 }
4802
4803 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004804 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004805 }
4806 return(ret);
4807}
4808
4809/**
4810 * xmlParseElementChildrenContentDecl:
4811 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00004812 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00004813 *
4814 * parse the declaration for a Mixed Element content
4815 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4816 *
4817 *
4818 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4819 *
4820 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4821 *
4822 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4823 *
4824 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4825 *
4826 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4827 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004828 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004829 * opening or closing parentheses in a choice, seq, or Mixed
4830 * construct is contained in the replacement text for a parameter
4831 * entity, both must be contained in the same replacement text. For
4832 * interoperability, if a parameter-entity reference appears in a
4833 * choice, seq, or Mixed construct, its replacement text should not
4834 * be empty, and neither the first nor last non-blank character of
4835 * the replacement text should be a connector (| or ,).
4836 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004837 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004838 * hierarchy.
4839 */
4840xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004842 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004843 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00004844 xmlChar type = 0;
4845
4846 SKIP_BLANKS;
4847 GROW;
4848 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004849 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004850
Owen Taylor3473f882001-02-23 17:55:21 +00004851 /* Recurse on first child */
4852 NEXT;
4853 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004854 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 SKIP_BLANKS;
4856 GROW;
4857 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004858 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004859 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004860 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004861 return(NULL);
4862 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004863 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004864 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004865 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00004866 return(NULL);
4867 }
Owen Taylor3473f882001-02-23 17:55:21 +00004868 GROW;
4869 if (RAW == '?') {
4870 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4871 NEXT;
4872 } else if (RAW == '*') {
4873 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4874 NEXT;
4875 } else if (RAW == '+') {
4876 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4877 NEXT;
4878 } else {
4879 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4880 }
Owen Taylor3473f882001-02-23 17:55:21 +00004881 GROW;
4882 }
4883 SKIP_BLANKS;
4884 SHRINK;
4885 while (RAW != ')') {
4886 /*
4887 * Each loop we parse one separator and one element.
4888 */
4889 if (RAW == ',') {
4890 if (type == 0) type = CUR;
4891
4892 /*
4893 * Detect "Name | Name , Name" error
4894 */
4895 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004896 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004897 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004898 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004899 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004900 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004901 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004902 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004903 return(NULL);
4904 }
4905 NEXT;
4906
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004907 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00004908 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004909 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004910 xmlFreeDocElementContent(ctxt->myDoc, last);
4911 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004912 return(NULL);
4913 }
4914 if (last == NULL) {
4915 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004916 if (ret != NULL)
4917 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004918 ret = cur = op;
4919 } else {
4920 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004921 if (op != NULL)
4922 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004924 if (last != NULL)
4925 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004926 cur =op;
4927 last = NULL;
4928 }
4929 } else if (RAW == '|') {
4930 if (type == 0) type = CUR;
4931
4932 /*
4933 * Detect "Name , Name | Name" error
4934 */
4935 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004936 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004937 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004938 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004939 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004940 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004941 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004942 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004943 return(NULL);
4944 }
4945 NEXT;
4946
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004947 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00004948 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004949 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004950 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00004951 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004952 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004953 return(NULL);
4954 }
4955 if (last == NULL) {
4956 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004957 if (ret != NULL)
4958 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004959 ret = cur = op;
4960 } else {
4961 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004962 if (op != NULL)
4963 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004964 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004965 if (last != NULL)
4966 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004967 cur =op;
4968 last = NULL;
4969 }
4970 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004971 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004972 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004973 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004974 return(NULL);
4975 }
4976 GROW;
4977 SKIP_BLANKS;
4978 GROW;
4979 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004980 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004981 /* Recurse on second child */
4982 NEXT;
4983 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004984 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00004985 SKIP_BLANKS;
4986 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004987 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004989 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004990 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004991 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00004992 return(NULL);
4993 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00004994 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Owen Taylor3473f882001-02-23 17:55:21 +00004995 if (RAW == '?') {
4996 last->ocur = XML_ELEMENT_CONTENT_OPT;
4997 NEXT;
4998 } else if (RAW == '*') {
4999 last->ocur = XML_ELEMENT_CONTENT_MULT;
5000 NEXT;
5001 } else if (RAW == '+') {
5002 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5003 NEXT;
5004 } else {
5005 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5006 }
5007 }
5008 SKIP_BLANKS;
5009 GROW;
5010 }
5011 if ((cur != NULL) && (last != NULL)) {
5012 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005013 if (last != NULL)
5014 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005016 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005017 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5018"Element content declaration doesn't start and stop in the same entity\n",
5019 NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005020 }
Owen Taylor3473f882001-02-23 17:55:21 +00005021 NEXT;
5022 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005023 if (ret != NULL) {
5024 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5025 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5026 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5027 else
5028 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030 NEXT;
5031 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005032 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005033 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005034 cur = ret;
5035 /*
5036 * Some normalization:
5037 * (a | b* | c?)* == (a | b | c)*
5038 */
5039 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5040 if ((cur->c1 != NULL) &&
5041 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5042 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5043 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5044 if ((cur->c2 != NULL) &&
5045 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5046 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5047 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5048 cur = cur->c2;
5049 }
5050 }
Owen Taylor3473f882001-02-23 17:55:21 +00005051 NEXT;
5052 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005053 if (ret != NULL) {
5054 int found = 0;
5055
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005056 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5057 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5058 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005059 else
5060 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005061 /*
5062 * Some normalization:
5063 * (a | b*)+ == (a | b)*
5064 * (a | b?)+ == (a | b)*
5065 */
5066 while (cur->type == XML_ELEMENT_CONTENT_OR) {
5067 if ((cur->c1 != NULL) &&
5068 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5069 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5070 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5071 found = 1;
5072 }
5073 if ((cur->c2 != NULL) &&
5074 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5075 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5076 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5077 found = 1;
5078 }
5079 cur = cur->c2;
5080 }
5081 if (found)
5082 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5083 }
Owen Taylor3473f882001-02-23 17:55:21 +00005084 NEXT;
5085 }
5086 return(ret);
5087}
5088
5089/**
5090 * xmlParseElementContentDecl:
5091 * @ctxt: an XML parser context
5092 * @name: the name of the element being defined.
5093 * @result: the Element Content pointer will be stored here if any
5094 *
5095 * parse the declaration for an Element content either Mixed or Children,
5096 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5097 *
5098 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5099 *
5100 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5101 */
5102
5103int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005104xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005105 xmlElementContentPtr *result) {
5106
5107 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005108 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005109 int res;
5110
5111 *result = NULL;
5112
5113 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005114 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005115 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005116 return(-1);
5117 }
5118 NEXT;
5119 GROW;
5120 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005121 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005122 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005123 res = XML_ELEMENT_TYPE_MIXED;
5124 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005125 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 res = XML_ELEMENT_TYPE_ELEMENT;
5127 }
Owen Taylor3473f882001-02-23 17:55:21 +00005128 SKIP_BLANKS;
5129 *result = tree;
5130 return(res);
5131}
5132
5133/**
5134 * xmlParseElementDecl:
5135 * @ctxt: an XML parser context
5136 *
5137 * parse an Element declaration.
5138 *
5139 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
5140 *
5141 * [ VC: Unique Element Type Declaration ]
5142 * No element type may be declared more than once
5143 *
5144 * Returns the type of the element, or -1 in case of error
5145 */
5146int
5147xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005148 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005149 int ret = -1;
5150 xmlElementContentPtr content = NULL;
5151
Daniel Veillard4c778d82005-01-23 17:37:44 +00005152 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005153 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005154 xmlParserInputPtr input = ctxt->input;
5155
5156 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005157 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5159 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005160 }
5161 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005162 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005163 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005164 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5165 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005166 return(-1);
5167 }
5168 while ((RAW == 0) && (ctxt->inputNr > 1))
5169 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005170 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005171 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5172 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005173 }
5174 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005175 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005176 SKIP(5);
5177 /*
5178 * Element must always be empty.
5179 */
5180 ret = XML_ELEMENT_TYPE_EMPTY;
5181 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
5182 (NXT(2) == 'Y')) {
5183 SKIP(3);
5184 /*
5185 * Element is a generic container.
5186 */
5187 ret = XML_ELEMENT_TYPE_ANY;
5188 } else if (RAW == '(') {
5189 ret = xmlParseElementContentDecl(ctxt, name, &content);
5190 } else {
5191 /*
5192 * [ WFC: PEs in Internal Subset ] error handling.
5193 */
5194 if ((RAW == '%') && (ctxt->external == 0) &&
5195 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005196 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005197 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005198 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00005199 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00005200 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
5201 }
Owen Taylor3473f882001-02-23 17:55:21 +00005202 return(-1);
5203 }
5204
5205 SKIP_BLANKS;
5206 /*
5207 * Pop-up of finished entities.
5208 */
5209 while ((RAW == 0) && (ctxt->inputNr > 1))
5210 xmlPopInput(ctxt);
5211 SKIP_BLANKS;
5212
5213 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005214 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005215 if (content != NULL) {
5216 xmlFreeDocElementContent(ctxt->myDoc, content);
5217 }
Owen Taylor3473f882001-02-23 17:55:21 +00005218 } else {
5219 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005220 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5221 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005222 }
5223
5224 NEXT;
5225 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005226 (ctxt->sax->elementDecl != NULL)) {
5227 if (content != NULL)
5228 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005229 ctxt->sax->elementDecl(ctxt->userData, name, ret,
5230 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005231 if ((content != NULL) && (content->parent == NULL)) {
5232 /*
5233 * this is a trick: if xmlAddElementDecl is called,
5234 * instead of copying the full tree it is plugged directly
5235 * if called from the parser. Avoid duplicating the
5236 * interfaces or change the API/ABI
5237 */
5238 xmlFreeDocElementContent(ctxt->myDoc, content);
5239 }
5240 } else if (content != NULL) {
5241 xmlFreeDocElementContent(ctxt->myDoc, content);
5242 }
Owen Taylor3473f882001-02-23 17:55:21 +00005243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 }
5245 return(ret);
5246}
5247
5248/**
Owen Taylor3473f882001-02-23 17:55:21 +00005249 * xmlParseConditionalSections
5250 * @ctxt: an XML parser context
5251 *
5252 * [61] conditionalSect ::= includeSect | ignoreSect
5253 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
5254 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
5255 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
5256 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
5257 */
5258
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005259static void
Owen Taylor3473f882001-02-23 17:55:21 +00005260xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
5261 SKIP(3);
5262 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005263 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005264 SKIP(7);
5265 SKIP_BLANKS;
5266 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005267 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 } else {
5269 NEXT;
5270 }
5271 if (xmlParserDebugEntities) {
5272 if ((ctxt->input != NULL) && (ctxt->input->filename))
5273 xmlGenericError(xmlGenericErrorContext,
5274 "%s(%d): ", ctxt->input->filename,
5275 ctxt->input->line);
5276 xmlGenericError(xmlGenericErrorContext,
5277 "Entering INCLUDE Conditional Section\n");
5278 }
5279
5280 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
5281 (NXT(2) != '>'))) {
5282 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005283 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005284
5285 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5286 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005287 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005288 NEXT;
5289 } else if (RAW == '%') {
5290 xmlParsePEReference(ctxt);
5291 } else
5292 xmlParseMarkupDecl(ctxt);
5293
5294 /*
5295 * Pop-up of finished entities.
5296 */
5297 while ((RAW == 0) && (ctxt->inputNr > 1))
5298 xmlPopInput(ctxt);
5299
Daniel Veillardfdc91562002-07-01 21:52:03 +00005300 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005301 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005302 break;
5303 }
5304 }
5305 if (xmlParserDebugEntities) {
5306 if ((ctxt->input != NULL) && (ctxt->input->filename))
5307 xmlGenericError(xmlGenericErrorContext,
5308 "%s(%d): ", ctxt->input->filename,
5309 ctxt->input->line);
5310 xmlGenericError(xmlGenericErrorContext,
5311 "Leaving INCLUDE Conditional Section\n");
5312 }
5313
Daniel Veillarda07050d2003-10-19 14:46:32 +00005314 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005315 int state;
William M. Brack78637da2003-07-31 14:47:38 +00005316 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00005317 int depth = 0;
5318
5319 SKIP(6);
5320 SKIP_BLANKS;
5321 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005322 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 } else {
5324 NEXT;
5325 }
5326 if (xmlParserDebugEntities) {
5327 if ((ctxt->input != NULL) && (ctxt->input->filename))
5328 xmlGenericError(xmlGenericErrorContext,
5329 "%s(%d): ", ctxt->input->filename,
5330 ctxt->input->line);
5331 xmlGenericError(xmlGenericErrorContext,
5332 "Entering IGNORE Conditional Section\n");
5333 }
5334
5335 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005336 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00005337 * But disable SAX event generating DTD building in the meantime
5338 */
5339 state = ctxt->disableSAX;
5340 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00005341 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 ctxt->instate = XML_PARSER_IGNORE;
5343
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005344 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005345 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5346 depth++;
5347 SKIP(3);
5348 continue;
5349 }
5350 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
5351 if (--depth >= 0) SKIP(3);
5352 continue;
5353 }
5354 NEXT;
5355 continue;
5356 }
5357
5358 ctxt->disableSAX = state;
5359 ctxt->instate = instate;
5360
5361 if (xmlParserDebugEntities) {
5362 if ((ctxt->input != NULL) && (ctxt->input->filename))
5363 xmlGenericError(xmlGenericErrorContext,
5364 "%s(%d): ", ctxt->input->filename,
5365 ctxt->input->line);
5366 xmlGenericError(xmlGenericErrorContext,
5367 "Leaving IGNORE Conditional Section\n");
5368 }
5369
5370 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005371 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005372 }
5373
5374 if (RAW == 0)
5375 SHRINK;
5376
5377 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005378 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 } else {
5380 SKIP(3);
5381 }
5382}
5383
5384/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005385 * xmlParseMarkupDecl:
5386 * @ctxt: an XML parser context
5387 *
5388 * parse Markup declarations
5389 *
5390 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
5391 * NotationDecl | PI | Comment
5392 *
5393 * [ VC: Proper Declaration/PE Nesting ]
5394 * Parameter-entity replacement text must be properly nested with
5395 * markup declarations. That is to say, if either the first character
5396 * or the last character of a markup declaration (markupdecl above) is
5397 * contained in the replacement text for a parameter-entity reference,
5398 * both must be contained in the same replacement text.
5399 *
5400 * [ WFC: PEs in Internal Subset ]
5401 * In the internal DTD subset, parameter-entity references can occur
5402 * only where markup declarations can occur, not within markup declarations.
5403 * (This does not apply to references that occur in external parameter
5404 * entities or to the external subset.)
5405 */
5406void
5407xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
5408 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005409 if (CUR == '<') {
5410 if (NXT(1) == '!') {
5411 switch (NXT(2)) {
5412 case 'E':
5413 if (NXT(3) == 'L')
5414 xmlParseElementDecl(ctxt);
5415 else if (NXT(3) == 'N')
5416 xmlParseEntityDecl(ctxt);
5417 break;
5418 case 'A':
5419 xmlParseAttributeListDecl(ctxt);
5420 break;
5421 case 'N':
5422 xmlParseNotationDecl(ctxt);
5423 break;
5424 case '-':
5425 xmlParseComment(ctxt);
5426 break;
5427 default:
5428 /* there is an error but it will be detected later */
5429 break;
5430 }
5431 } else if (NXT(1) == '?') {
5432 xmlParsePI(ctxt);
5433 }
5434 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005435 /*
5436 * This is only for internal subset. On external entities,
5437 * the replacement is done before parsing stage
5438 */
5439 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
5440 xmlParsePEReference(ctxt);
5441
5442 /*
5443 * Conditional sections are allowed from entities included
5444 * by PE References in the internal subset.
5445 */
5446 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
5447 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5448 xmlParseConditionalSections(ctxt);
5449 }
5450 }
5451
5452 ctxt->instate = XML_PARSER_DTD;
5453}
5454
5455/**
5456 * xmlParseTextDecl:
5457 * @ctxt: an XML parser context
5458 *
5459 * parse an XML declaration header for external entities
5460 *
5461 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5462 *
5463 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5464 */
5465
5466void
5467xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5468 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005469 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005470
5471 /*
5472 * We know that '<?xml' is here.
5473 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005474 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005475 SKIP(5);
5476 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005477 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005478 return;
5479 }
5480
William M. Brack76e95df2003-10-18 16:20:14 +00005481 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005484 }
5485 SKIP_BLANKS;
5486
5487 /*
5488 * We may have the VersionInfo here.
5489 */
5490 version = xmlParseVersionInfo(ctxt);
5491 if (version == NULL)
5492 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005493 else {
William M. Brack76e95df2003-10-18 16:20:14 +00005494 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5496 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00005497 }
5498 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005499 ctxt->input->version = version;
5500
5501 /*
5502 * We must have the encoding declaration
5503 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005504 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005505 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5506 /*
5507 * The XML REC instructs us to stop parsing right here
5508 */
5509 return;
5510 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00005511 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
5512 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
5513 "Missing encoding in text declaration\n");
5514 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005515
5516 SKIP_BLANKS;
5517 if ((RAW == '?') && (NXT(1) == '>')) {
5518 SKIP(2);
5519 } else if (RAW == '>') {
5520 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005521 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005522 NEXT;
5523 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005524 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005525 MOVETO_ENDTAG(CUR_PTR);
5526 NEXT;
5527 }
5528}
5529
5530/**
Owen Taylor3473f882001-02-23 17:55:21 +00005531 * xmlParseExternalSubset:
5532 * @ctxt: an XML parser context
5533 * @ExternalID: the external identifier
5534 * @SystemID: the system identifier (or URL)
5535 *
5536 * parse Markup declarations from an external subset
5537 *
5538 * [30] extSubset ::= textDecl? extSubsetDecl
5539 *
5540 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5541 */
5542void
5543xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5544 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00005545 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005546 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005547 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005548 xmlParseTextDecl(ctxt);
5549 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5550 /*
5551 * The XML REC instructs us to stop parsing right here
5552 */
5553 ctxt->instate = XML_PARSER_EOF;
5554 return;
5555 }
5556 }
5557 if (ctxt->myDoc == NULL) {
5558 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5559 }
5560 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5561 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5562
5563 ctxt->instate = XML_PARSER_DTD;
5564 ctxt->external = 1;
5565 while (((RAW == '<') && (NXT(1) == '?')) ||
5566 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00005567 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005568 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00005569 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00005570
5571 GROW;
5572 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5573 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00005574 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005575 NEXT;
5576 } else if (RAW == '%') {
5577 xmlParsePEReference(ctxt);
5578 } else
5579 xmlParseMarkupDecl(ctxt);
5580
5581 /*
5582 * Pop-up of finished entities.
5583 */
5584 while ((RAW == 0) && (ctxt->inputNr > 1))
5585 xmlPopInput(ctxt);
5586
Daniel Veillardfdc91562002-07-01 21:52:03 +00005587 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005588 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005589 break;
5590 }
5591 }
5592
5593 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005594 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005595 }
5596
5597}
5598
5599/**
5600 * xmlParseReference:
5601 * @ctxt: an XML parser context
5602 *
5603 * parse and handle entity references in content, depending on the SAX
5604 * interface, this may end-up in a call to character() if this is a
5605 * CharRef, a predefined entity, if there is no reference() callback.
5606 * or if the parser was asked to switch to that mode.
5607 *
5608 * [67] Reference ::= EntityRef | CharRef
5609 */
5610void
5611xmlParseReference(xmlParserCtxtPtr ctxt) {
5612 xmlEntityPtr ent;
5613 xmlChar *val;
5614 if (RAW != '&') return;
5615
5616 if (NXT(1) == '#') {
5617 int i = 0;
5618 xmlChar out[10];
5619 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005620 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005621
5622 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5623 /*
5624 * So we are using non-UTF-8 buffers
5625 * Check that the char fit on 8bits, if not
5626 * generate a CharRef.
5627 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005628 if (value <= 0xFF) {
5629 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005630 out[1] = 0;
5631 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5632 (!ctxt->disableSAX))
5633 ctxt->sax->characters(ctxt->userData, out, 1);
5634 } else {
5635 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005636 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005637 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00005638 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5640 (!ctxt->disableSAX))
5641 ctxt->sax->reference(ctxt->userData, out);
5642 }
5643 } else {
5644 /*
5645 * Just encode the value in UTF-8
5646 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005647 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005648 out[i] = 0;
5649 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5650 (!ctxt->disableSAX))
5651 ctxt->sax->characters(ctxt->userData, out, i);
5652 }
5653 } else {
5654 ent = xmlParseEntityRef(ctxt);
5655 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005656 if (!ctxt->wellFormed)
5657 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005658 if ((ent->name != NULL) &&
5659 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5660 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00005661 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00005662
5663
5664 /*
5665 * The first reference to the entity trigger a parsing phase
5666 * where the ent->children is filled with the result from
5667 * the parsing.
5668 */
5669 if (ent->children == NULL) {
5670 xmlChar *value;
5671 value = ent->content;
5672
5673 /*
5674 * Check that this entity is well formed
5675 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00005676 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00005677 (value[1] == 0) && (value[0] == '<') &&
5678 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5679 /*
5680 * DONE: get definite answer on this !!!
5681 * Lots of entity decls are used to declare a single
5682 * char
5683 * <!ENTITY lt "<">
5684 * Which seems to be valid since
5685 * 2.4: The ampersand character (&) and the left angle
5686 * bracket (<) may appear in their literal form only
5687 * when used ... They are also legal within the literal
5688 * entity value of an internal entity declaration;i
5689 * see "4.3.2 Well-Formed Parsed Entities".
5690 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5691 * Looking at the OASIS test suite and James Clark
5692 * tests, this is broken. However the XML REC uses
5693 * it. Is the XML REC not well-formed ????
5694 * This is a hack to avoid this problem
5695 *
5696 * ANSWER: since lt gt amp .. are already defined,
5697 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005698 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005699 * is lousy but acceptable.
5700 */
5701 list = xmlNewDocText(ctxt->myDoc, value);
5702 if (list != NULL) {
5703 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5704 (ent->children == NULL)) {
5705 ent->children = list;
5706 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005707 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00005708 list->parent = (xmlNodePtr) ent;
5709 } else {
5710 xmlFreeNodeList(list);
5711 }
5712 } else if (list != NULL) {
5713 xmlFreeNodeList(list);
5714 }
5715 } else {
5716 /*
5717 * 4.3.2: An internal general parsed entity is well-formed
5718 * if its replacement text matches the production labeled
5719 * content.
5720 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005721
5722 void *user_data;
5723 /*
5724 * This is a bit hackish but this seems the best
5725 * way to make sure both SAX and DOM entity support
5726 * behaves okay.
5727 */
5728 if (ctxt->userData == ctxt)
5729 user_data = NULL;
5730 else
5731 user_data = ctxt->userData;
5732
Owen Taylor3473f882001-02-23 17:55:21 +00005733 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5734 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00005735 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
5736 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 ctxt->depth--;
5738 } else if (ent->etype ==
5739 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5740 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005741 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005742 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005743 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 ctxt->depth--;
5745 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00005746 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00005747 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
5748 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005749 }
5750 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005751 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005752 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00005753 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005754 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005756 (ent->children == NULL)) {
5757 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005758 if (ctxt->replaceEntities) {
5759 /*
5760 * Prune it directly in the generated document
5761 * except for single text nodes.
5762 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005763 if (((list->type == XML_TEXT_NODE) &&
5764 (list->next == NULL)) ||
5765 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00005766 list->parent = (xmlNodePtr) ent;
5767 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00005768 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005769 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005770 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005771 while (list != NULL) {
5772 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00005773 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005774 if (list->next == NULL)
5775 ent->last = list;
5776 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005777 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005778 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00005779#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005780 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5781 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00005782#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005783 }
5784 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00005785 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005786 while (list != NULL) {
5787 list->parent = (xmlNodePtr) ent;
5788 if (list->next == NULL)
5789 ent->last = list;
5790 list = list->next;
5791 }
Owen Taylor3473f882001-02-23 17:55:21 +00005792 }
5793 } else {
5794 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005795 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005796 }
William M. Brackb670e2e2003-09-27 01:05:55 +00005797 } else if ((ret != XML_ERR_OK) &&
5798 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, ret, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 } else if (list != NULL) {
5801 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005802 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 }
5805 }
5806 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5807 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5808 /*
5809 * Create a node.
5810 */
5811 ctxt->sax->reference(ctxt->userData, ent->name);
5812 return;
5813 } else if (ctxt->replaceEntities) {
William M. Brack1227fb32004-10-25 23:17:53 +00005814 /*
5815 * There is a problem on the handling of _private for entities
5816 * (bug 155816): Should we copy the content of the field from
5817 * the entity (possibly overwriting some value set by the user
5818 * when a copy is created), should we leave it alone, or should
5819 * we try to take care of different situations? The problem
5820 * is exacerbated by the usage of this field by the xmlReader.
5821 * To fix this bug, we look at _private on the created node
5822 * and, if it's NULL, we copy in whatever was in the entity.
5823 * If it's not NULL we leave it alone. This is somewhat of a
5824 * hack - maybe we should have further tests to determine
5825 * what to do.
5826 */
Owen Taylor3473f882001-02-23 17:55:21 +00005827 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5828 /*
5829 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005830 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005831 * In the first occurrence list contains the replacement.
5832 * progressive == 2 means we are operating on the Reader
5833 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00005834 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005835 if (((list == NULL) && (ent->owner == 0)) ||
5836 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005837 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005838
5839 /*
5840 * when operating on a reader, the entities definitions
5841 * are always owning the entities subtree.
5842 if (ctxt->parseMode == XML_PARSE_READER)
5843 ent->owner = 1;
5844 */
5845
Daniel Veillard62f313b2001-07-04 19:49:14 +00005846 cur = ent->children;
5847 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00005848 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005849 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005850 if (nw->_private == NULL)
5851 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00005852 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005853 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00005854 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005855 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00005856 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005857 if (cur == ent->last) {
5858 /*
5859 * needed to detect some strange empty
5860 * node cases in the reader tests
5861 */
5862 if ((ctxt->parseMode == XML_PARSE_READER) &&
5863 (nw->type == XML_ELEMENT_NODE) &&
5864 (nw->children == NULL))
5865 nw->extra = 1;
5866
Daniel Veillard62f313b2001-07-04 19:49:14 +00005867 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00005868 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005869 cur = cur->next;
5870 }
Daniel Veillard81273902003-09-30 00:43:48 +00005871#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00005872 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005873 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005874#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005875 } else if (list == NULL) {
5876 xmlNodePtr nw = NULL, cur, next, last,
5877 firstChild = NULL;
5878 /*
5879 * Copy the entity child list and make it the new
5880 * entity child list. The goal is to make sure any
5881 * ID or REF referenced will be the one from the
5882 * document content and not the entity copy.
5883 */
5884 cur = ent->children;
5885 ent->children = NULL;
5886 last = ent->last;
5887 ent->last = NULL;
5888 while (cur != NULL) {
5889 next = cur->next;
5890 cur->next = NULL;
5891 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00005892 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005893 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00005894 if (nw->_private == NULL)
5895 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005896 if (firstChild == NULL){
5897 firstChild = cur;
5898 }
5899 xmlAddChild((xmlNodePtr) ent, nw);
5900 xmlAddChild(ctxt->node, cur);
5901 }
5902 if (cur == last)
5903 break;
5904 cur = next;
5905 }
5906 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00005907#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00005908 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5909 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00005910#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005911 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005912 const xmlChar *nbktext;
5913
Daniel Veillard62f313b2001-07-04 19:49:14 +00005914 /*
5915 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005916 * node with a possible previous text one which
5917 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005918 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005919 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
5920 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005921 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005922 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005923 if ((ent->last != ent->children) &&
5924 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00005925 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005926 xmlAddChildList(ctxt->node, ent->children);
5927 }
5928
Owen Taylor3473f882001-02-23 17:55:21 +00005929 /*
5930 * This is to avoid a nasty side effect, see
5931 * characters() in SAX.c
5932 */
5933 ctxt->nodemem = 0;
5934 ctxt->nodelen = 0;
5935 return;
5936 } else {
5937 /*
5938 * Probably running in SAX mode
5939 */
5940 xmlParserInputPtr input;
5941
5942 input = xmlNewEntityInputStream(ctxt, ent);
5943 xmlPushInput(ctxt, input);
5944 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00005945 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
5946 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 xmlParseTextDecl(ctxt);
5948 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5949 /*
5950 * The XML REC instructs us to stop parsing right here
5951 */
5952 ctxt->instate = XML_PARSER_EOF;
5953 return;
5954 }
5955 if (input->standalone == 1) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005956 xmlFatalErr(ctxt, XML_ERR_EXT_ENTITY_STANDALONE,
5957 NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 }
5959 }
5960 return;
5961 }
5962 }
5963 } else {
5964 val = ent->content;
5965 if (val == NULL) return;
5966 /*
5967 * inline the entity.
5968 */
5969 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5970 (!ctxt->disableSAX))
5971 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5972 }
5973 }
5974}
5975
5976/**
5977 * xmlParseEntityRef:
5978 * @ctxt: an XML parser context
5979 *
5980 * parse ENTITY references declarations
5981 *
5982 * [68] EntityRef ::= '&' Name ';'
5983 *
5984 * [ WFC: Entity Declared ]
5985 * In a document without any DTD, a document with only an internal DTD
5986 * subset which contains no parameter entity references, or a document
5987 * with "standalone='yes'", the Name given in the entity reference
5988 * must match that in an entity declaration, except that well-formed
5989 * documents need not declare any of the following entities: amp, lt,
5990 * gt, apos, quot. The declaration of a parameter entity must precede
5991 * any reference to it. Similarly, the declaration of a general entity
5992 * must precede any reference to it which appears in a default value in an
5993 * attribute-list declaration. Note that if entities are declared in the
5994 * external subset or in external parameter entities, a non-validating
5995 * processor is not obligated to read and process their declarations;
5996 * for such documents, the rule that an entity must be declared is a
5997 * well-formedness constraint only if standalone='yes'.
5998 *
5999 * [ WFC: Parsed Entity ]
6000 * An entity reference must not contain the name of an unparsed entity
6001 *
6002 * Returns the xmlEntityPtr if found, or NULL otherwise.
6003 */
6004xmlEntityPtr
6005xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006006 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006007 xmlEntityPtr ent = NULL;
6008
6009 GROW;
6010
6011 if (RAW == '&') {
6012 NEXT;
6013 name = xmlParseName(ctxt);
6014 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006015 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6016 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006017 } else {
6018 if (RAW == ';') {
6019 NEXT;
6020 /*
6021 * Ask first SAX for entity resolution, otherwise try the
6022 * predefined set.
6023 */
6024 if (ctxt->sax != NULL) {
6025 if (ctxt->sax->getEntity != NULL)
6026 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006027 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006028 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006029 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6030 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006031 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006032 }
Owen Taylor3473f882001-02-23 17:55:21 +00006033 }
6034 /*
6035 * [ WFC: Entity Declared ]
6036 * In a document without any DTD, a document with only an
6037 * internal DTD subset which contains no parameter entity
6038 * references, or a document with "standalone='yes'", the
6039 * Name given in the entity reference must match that in an
6040 * entity declaration, except that well-formed documents
6041 * need not declare any of the following entities: amp, lt,
6042 * gt, apos, quot.
6043 * The declaration of a parameter entity must precede any
6044 * reference to it.
6045 * Similarly, the declaration of a general entity must
6046 * precede any reference to it which appears in a default
6047 * value in an attribute-list declaration. Note that if
6048 * entities are declared in the external subset or in
6049 * external parameter entities, a non-validating processor
6050 * is not obligated to read and process their declarations;
6051 * for such documents, the rule that an entity must be
6052 * declared is a well-formedness constraint only if
6053 * standalone='yes'.
6054 */
6055 if (ent == NULL) {
6056 if ((ctxt->standalone == 1) ||
6057 ((ctxt->hasExternalSubset == 0) &&
6058 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006059 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006060 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006062 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006063 "Entity '%s' not defined\n", name);
6064 }
Daniel Veillardf403d292003-10-05 13:51:35 +00006065 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00006066 }
6067
6068 /*
6069 * [ WFC: Parsed Entity ]
6070 * An entity reference must not contain the name of an
6071 * unparsed entity
6072 */
6073 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006074 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006075 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006076 }
6077
6078 /*
6079 * [ WFC: No External Entity References ]
6080 * Attribute values cannot contain direct or indirect
6081 * entity references to external entities.
6082 */
6083 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6084 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006085 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
6086 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 }
6088 /*
6089 * [ WFC: No < in Attribute Values ]
6090 * The replacement text of any entity referred to directly or
6091 * indirectly in an attribute value (other than "&lt;") must
6092 * not contain a <.
6093 */
6094 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6095 (ent != NULL) &&
6096 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6097 (ent->content != NULL) &&
6098 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006099 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00006100 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 }
6102
6103 /*
6104 * Internal check, no parameter entities here ...
6105 */
6106 else {
6107 switch (ent->etype) {
6108 case XML_INTERNAL_PARAMETER_ENTITY:
6109 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006110 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6111 "Attempt to reference the parameter entity '%s'\n",
6112 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 break;
6114 default:
6115 break;
6116 }
6117 }
6118
6119 /*
6120 * [ WFC: No Recursion ]
6121 * A parsed entity must not contain a recursive reference
6122 * to itself, either directly or indirectly.
6123 * Done somewhere else
6124 */
6125
6126 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006127 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006128 }
Owen Taylor3473f882001-02-23 17:55:21 +00006129 }
6130 }
6131 return(ent);
6132}
6133
6134/**
6135 * xmlParseStringEntityRef:
6136 * @ctxt: an XML parser context
6137 * @str: a pointer to an index in the string
6138 *
6139 * parse ENTITY references declarations, but this version parses it from
6140 * a string value.
6141 *
6142 * [68] EntityRef ::= '&' Name ';'
6143 *
6144 * [ WFC: Entity Declared ]
6145 * In a document without any DTD, a document with only an internal DTD
6146 * subset which contains no parameter entity references, or a document
6147 * with "standalone='yes'", the Name given in the entity reference
6148 * must match that in an entity declaration, except that well-formed
6149 * documents need not declare any of the following entities: amp, lt,
6150 * gt, apos, quot. The declaration of a parameter entity must precede
6151 * any reference to it. Similarly, the declaration of a general entity
6152 * must precede any reference to it which appears in a default value in an
6153 * attribute-list declaration. Note that if entities are declared in the
6154 * external subset or in external parameter entities, a non-validating
6155 * processor is not obligated to read and process their declarations;
6156 * for such documents, the rule that an entity must be declared is a
6157 * well-formedness constraint only if standalone='yes'.
6158 *
6159 * [ WFC: Parsed Entity ]
6160 * An entity reference must not contain the name of an unparsed entity
6161 *
6162 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
6163 * is updated to the current location in the string.
6164 */
6165xmlEntityPtr
6166xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
6167 xmlChar *name;
6168 const xmlChar *ptr;
6169 xmlChar cur;
6170 xmlEntityPtr ent = NULL;
6171
6172 if ((str == NULL) || (*str == NULL))
6173 return(NULL);
6174 ptr = *str;
6175 cur = *ptr;
6176 if (cur == '&') {
6177 ptr++;
6178 cur = *ptr;
6179 name = xmlParseStringName(ctxt, &ptr);
6180 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006181 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6182 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006183 } else {
6184 if (*ptr == ';') {
6185 ptr++;
6186 /*
6187 * Ask first SAX for entity resolution, otherwise try the
6188 * predefined set.
6189 */
6190 if (ctxt->sax != NULL) {
6191 if (ctxt->sax->getEntity != NULL)
6192 ent = ctxt->sax->getEntity(ctxt->userData, name);
6193 if (ent == NULL)
6194 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006195 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006196 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006197 }
Owen Taylor3473f882001-02-23 17:55:21 +00006198 }
6199 /*
6200 * [ WFC: Entity Declared ]
6201 * In a document without any DTD, a document with only an
6202 * internal DTD subset which contains no parameter entity
6203 * references, or a document with "standalone='yes'", the
6204 * Name given in the entity reference must match that in an
6205 * entity declaration, except that well-formed documents
6206 * need not declare any of the following entities: amp, lt,
6207 * gt, apos, quot.
6208 * The declaration of a parameter entity must precede any
6209 * reference to it.
6210 * Similarly, the declaration of a general entity must
6211 * precede any reference to it which appears in a default
6212 * value in an attribute-list declaration. Note that if
6213 * entities are declared in the external subset or in
6214 * external parameter entities, a non-validating processor
6215 * is not obligated to read and process their declarations;
6216 * for such documents, the rule that an entity must be
6217 * declared is a well-formedness constraint only if
6218 * standalone='yes'.
6219 */
6220 if (ent == NULL) {
6221 if ((ctxt->standalone == 1) ||
6222 ((ctxt->hasExternalSubset == 0) &&
6223 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006224 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006225 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006226 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006227 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00006228 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006229 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006230 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00006231 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00006232 }
6233
6234 /*
6235 * [ WFC: Parsed Entity ]
6236 * An entity reference must not contain the name of an
6237 * unparsed entity
6238 */
6239 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006240 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006241 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006242 }
6243
6244 /*
6245 * [ WFC: No External Entity References ]
6246 * Attribute values cannot contain direct or indirect
6247 * entity references to external entities.
6248 */
6249 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6250 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006251 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00006252 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006253 }
6254 /*
6255 * [ WFC: No < in Attribute Values ]
6256 * The replacement text of any entity referred to directly or
6257 * indirectly in an attribute value (other than "&lt;") must
6258 * not contain a <.
6259 */
6260 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
6261 (ent != NULL) &&
6262 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
6263 (ent->content != NULL) &&
6264 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006265 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
6266 "'<' in entity '%s' is not allowed in attributes values\n",
6267 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 }
6269
6270 /*
6271 * Internal check, no parameter entities here ...
6272 */
6273 else {
6274 switch (ent->etype) {
6275 case XML_INTERNAL_PARAMETER_ENTITY:
6276 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00006277 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
6278 "Attempt to reference the parameter entity '%s'\n",
6279 name);
Owen Taylor3473f882001-02-23 17:55:21 +00006280 break;
6281 default:
6282 break;
6283 }
6284 }
6285
6286 /*
6287 * [ WFC: No Recursion ]
6288 * A parsed entity must not contain a recursive reference
6289 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006290 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00006291 */
6292
6293 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006294 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006295 }
6296 xmlFree(name);
6297 }
6298 }
6299 *str = ptr;
6300 return(ent);
6301}
6302
6303/**
6304 * xmlParsePEReference:
6305 * @ctxt: an XML parser context
6306 *
6307 * parse PEReference declarations
6308 * The entity content is handled directly by pushing it's content as
6309 * a new input stream.
6310 *
6311 * [69] PEReference ::= '%' Name ';'
6312 *
6313 * [ WFC: No Recursion ]
6314 * A parsed entity must not contain a recursive
6315 * reference to itself, either directly or indirectly.
6316 *
6317 * [ WFC: Entity Declared ]
6318 * In a document without any DTD, a document with only an internal DTD
6319 * subset which contains no parameter entity references, or a document
6320 * with "standalone='yes'", ... ... The declaration of a parameter
6321 * entity must precede any reference to it...
6322 *
6323 * [ VC: Entity Declared ]
6324 * In a document with an external subset or external parameter entities
6325 * with "standalone='no'", ... ... The declaration of a parameter entity
6326 * must precede any reference to it...
6327 *
6328 * [ WFC: In DTD ]
6329 * Parameter-entity references may only appear in the DTD.
6330 * NOTE: misleading but this is handled.
6331 */
6332void
Daniel Veillard8f597c32003-10-06 08:19:27 +00006333xmlParsePEReference(xmlParserCtxtPtr ctxt)
6334{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006335 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006336 xmlEntityPtr entity = NULL;
6337 xmlParserInputPtr input;
6338
6339 if (RAW == '%') {
6340 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006341 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00006342 if (name == NULL) {
6343 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6344 "xmlParsePEReference: no name\n");
6345 } else {
6346 if (RAW == ';') {
6347 NEXT;
6348 if ((ctxt->sax != NULL) &&
6349 (ctxt->sax->getParameterEntity != NULL))
6350 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6351 name);
6352 if (entity == NULL) {
6353 /*
6354 * [ WFC: Entity Declared ]
6355 * In a document without any DTD, a document with only an
6356 * internal DTD subset which contains no parameter entity
6357 * references, or a document with "standalone='yes'", ...
6358 * ... The declaration of a parameter entity must precede
6359 * any reference to it...
6360 */
6361 if ((ctxt->standalone == 1) ||
6362 ((ctxt->hasExternalSubset == 0) &&
6363 (ctxt->hasPErefs == 0))) {
6364 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6365 "PEReference: %%%s; not found\n",
6366 name);
6367 } else {
6368 /*
6369 * [ VC: Entity Declared ]
6370 * In a document with an external subset or external
6371 * parameter entities with "standalone='no'", ...
6372 * ... The declaration of a parameter entity must
6373 * precede any reference to it...
6374 */
6375 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6376 "PEReference: %%%s; not found\n",
6377 name, NULL);
6378 ctxt->valid = 0;
6379 }
6380 } else {
6381 /*
6382 * Internal checking in case the entity quest barfed
6383 */
6384 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6385 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6386 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6387 "Internal: %%%s; is not a parameter entity\n",
6388 name, NULL);
6389 } else if (ctxt->input->free != deallocblankswrapper) {
6390 input =
6391 xmlNewBlanksWrapperInputStream(ctxt, entity);
6392 xmlPushInput(ctxt, input);
6393 } else {
6394 /*
6395 * TODO !!!
6396 * handle the extra spaces added before and after
6397 * c.f. http://www.w3.org/TR/REC-xml#as-PE
6398 */
6399 input = xmlNewEntityInputStream(ctxt, entity);
6400 xmlPushInput(ctxt, input);
6401 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00006402 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00006403 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00006404 xmlParseTextDecl(ctxt);
6405 if (ctxt->errNo ==
6406 XML_ERR_UNSUPPORTED_ENCODING) {
6407 /*
6408 * The XML REC instructs us to stop parsing
6409 * right here
6410 */
6411 ctxt->instate = XML_PARSER_EOF;
6412 return;
6413 }
6414 }
6415 }
6416 }
6417 ctxt->hasPErefs = 1;
6418 } else {
6419 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
6420 }
6421 }
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
6423}
6424
6425/**
6426 * xmlParseStringPEReference:
6427 * @ctxt: an XML parser context
6428 * @str: a pointer to an index in the string
6429 *
6430 * parse PEReference declarations
6431 *
6432 * [69] PEReference ::= '%' Name ';'
6433 *
6434 * [ WFC: No Recursion ]
6435 * A parsed entity must not contain a recursive
6436 * reference to itself, either directly or indirectly.
6437 *
6438 * [ WFC: Entity Declared ]
6439 * In a document without any DTD, a document with only an internal DTD
6440 * subset which contains no parameter entity references, or a document
6441 * with "standalone='yes'", ... ... The declaration of a parameter
6442 * entity must precede any reference to it...
6443 *
6444 * [ VC: Entity Declared ]
6445 * In a document with an external subset or external parameter entities
6446 * with "standalone='no'", ... ... The declaration of a parameter entity
6447 * must precede any reference to it...
6448 *
6449 * [ WFC: In DTD ]
6450 * Parameter-entity references may only appear in the DTD.
6451 * NOTE: misleading but this is handled.
6452 *
6453 * Returns the string of the entity content.
6454 * str is updated to the current value of the index
6455 */
6456xmlEntityPtr
6457xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6458 const xmlChar *ptr;
6459 xmlChar cur;
6460 xmlChar *name;
6461 xmlEntityPtr entity = NULL;
6462
6463 if ((str == NULL) || (*str == NULL)) return(NULL);
6464 ptr = *str;
6465 cur = *ptr;
6466 if (cur == '%') {
6467 ptr++;
6468 cur = *ptr;
6469 name = xmlParseStringName(ctxt, &ptr);
6470 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006471 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6472 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006473 } else {
6474 cur = *ptr;
6475 if (cur == ';') {
6476 ptr++;
6477 cur = *ptr;
6478 if ((ctxt->sax != NULL) &&
6479 (ctxt->sax->getParameterEntity != NULL))
6480 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6481 name);
6482 if (entity == NULL) {
6483 /*
6484 * [ WFC: Entity Declared ]
6485 * In a document without any DTD, a document with only an
6486 * internal DTD subset which contains no parameter entity
6487 * references, or a document with "standalone='yes'", ...
6488 * ... The declaration of a parameter entity must precede
6489 * any reference to it...
6490 */
6491 if ((ctxt->standalone == 1) ||
6492 ((ctxt->hasExternalSubset == 0) &&
6493 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006494 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00006495 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006496 } else {
6497 /*
6498 * [ VC: Entity Declared ]
6499 * In a document with an external subset or external
6500 * parameter entities with "standalone='no'", ...
6501 * ... The declaration of a parameter entity must
6502 * precede any reference to it...
6503 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00006504 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6505 "PEReference: %%%s; not found\n",
6506 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006507 ctxt->valid = 0;
6508 }
6509 } else {
6510 /*
6511 * Internal checking in case the entity quest barfed
6512 */
6513 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6514 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006515 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
6516 "%%%s; is not a parameter entity\n",
6517 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006518 }
6519 }
6520 ctxt->hasPErefs = 1;
6521 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006522 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006523 }
6524 xmlFree(name);
6525 }
6526 }
6527 *str = ptr;
6528 return(entity);
6529}
6530
6531/**
6532 * xmlParseDocTypeDecl:
6533 * @ctxt: an XML parser context
6534 *
6535 * parse a DOCTYPE declaration
6536 *
6537 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6538 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6539 *
6540 * [ VC: Root Element Type ]
6541 * The Name in the document type declaration must match the element
6542 * type of the root element.
6543 */
6544
6545void
6546xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006547 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006548 xmlChar *ExternalID = NULL;
6549 xmlChar *URI = NULL;
6550
6551 /*
6552 * We know that '<!DOCTYPE' has been detected.
6553 */
6554 SKIP(9);
6555
6556 SKIP_BLANKS;
6557
6558 /*
6559 * Parse the DOCTYPE name.
6560 */
6561 name = xmlParseName(ctxt);
6562 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006563 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6564 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006565 }
6566 ctxt->intSubName = name;
6567
6568 SKIP_BLANKS;
6569
6570 /*
6571 * Check for SystemID and ExternalID
6572 */
6573 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6574
6575 if ((URI != NULL) || (ExternalID != NULL)) {
6576 ctxt->hasExternalSubset = 1;
6577 }
6578 ctxt->extSubURI = URI;
6579 ctxt->extSubSystem = ExternalID;
6580
6581 SKIP_BLANKS;
6582
6583 /*
6584 * Create and update the internal subset.
6585 */
6586 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6587 (!ctxt->disableSAX))
6588 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6589
6590 /*
6591 * Is there any internal subset declarations ?
6592 * they are handled separately in xmlParseInternalSubset()
6593 */
6594 if (RAW == '[')
6595 return;
6596
6597 /*
6598 * We should be at the end of the DOCTYPE declaration.
6599 */
6600 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006601 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 NEXT;
6604}
6605
6606/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006607 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006608 * @ctxt: an XML parser context
6609 *
6610 * parse the internal subset declaration
6611 *
6612 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6613 */
6614
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006615static void
Owen Taylor3473f882001-02-23 17:55:21 +00006616xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6617 /*
6618 * Is there any DTD definition ?
6619 */
6620 if (RAW == '[') {
6621 ctxt->instate = XML_PARSER_DTD;
6622 NEXT;
6623 /*
6624 * Parse the succession of Markup declarations and
6625 * PEReferences.
6626 * Subsequence (markupdecl | PEReference | S)*
6627 */
6628 while (RAW != ']') {
6629 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006630 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006631
6632 SKIP_BLANKS;
6633 xmlParseMarkupDecl(ctxt);
6634 xmlParsePEReference(ctxt);
6635
6636 /*
6637 * Pop-up of finished entities.
6638 */
6639 while ((RAW == 0) && (ctxt->inputNr > 1))
6640 xmlPopInput(ctxt);
6641
6642 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006643 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00006644 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006645 break;
6646 }
6647 }
6648 if (RAW == ']') {
6649 NEXT;
6650 SKIP_BLANKS;
6651 }
6652 }
6653
6654 /*
6655 * We should be at the end of the DOCTYPE declaration.
6656 */
6657 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006658 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006659 }
6660 NEXT;
6661}
6662
Daniel Veillard81273902003-09-30 00:43:48 +00006663#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00006664/**
6665 * xmlParseAttribute:
6666 * @ctxt: an XML parser context
6667 * @value: a xmlChar ** used to store the value of the attribute
6668 *
6669 * parse an attribute
6670 *
6671 * [41] Attribute ::= Name Eq AttValue
6672 *
6673 * [ WFC: No External Entity References ]
6674 * Attribute values cannot contain direct or indirect entity references
6675 * to external entities.
6676 *
6677 * [ WFC: No < in Attribute Values ]
6678 * The replacement text of any entity referred to directly or indirectly in
6679 * an attribute value (other than "&lt;") must not contain a <.
6680 *
6681 * [ VC: Attribute Value Type ]
6682 * The attribute must have been declared; the value must be of the type
6683 * declared for it.
6684 *
6685 * [25] Eq ::= S? '=' S?
6686 *
6687 * With namespace:
6688 *
6689 * [NS 11] Attribute ::= QName Eq AttValue
6690 *
6691 * Also the case QName == xmlns:??? is handled independently as a namespace
6692 * definition.
6693 *
6694 * Returns the attribute name, and the value in *value.
6695 */
6696
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006697const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006698xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006699 const xmlChar *name;
6700 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00006701
6702 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006703 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 name = xmlParseName(ctxt);
6705 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006706 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006707 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006708 return(NULL);
6709 }
6710
6711 /*
6712 * read the value
6713 */
6714 SKIP_BLANKS;
6715 if (RAW == '=') {
6716 NEXT;
6717 SKIP_BLANKS;
6718 val = xmlParseAttValue(ctxt);
6719 ctxt->instate = XML_PARSER_CONTENT;
6720 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006721 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00006722 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006723 return(NULL);
6724 }
6725
6726 /*
6727 * Check that xml:lang conforms to the specification
6728 * No more registered as an error, just generate a warning now
6729 * since this was deprecated in XML second edition
6730 */
6731 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6732 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006733 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
6734 "Malformed value for xml:lang : %s\n",
6735 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006736 }
6737 }
6738
6739 /*
6740 * Check that xml:space conforms to the specification
6741 */
6742 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6743 if (xmlStrEqual(val, BAD_CAST "default"))
6744 *(ctxt->space) = 0;
6745 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6746 *(ctxt->space) = 1;
6747 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006748 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00006749"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Owen Taylor3473f882001-02-23 17:55:21 +00006750 val);
Owen Taylor3473f882001-02-23 17:55:21 +00006751 }
6752 }
6753
6754 *value = val;
6755 return(name);
6756}
6757
6758/**
6759 * xmlParseStartTag:
6760 * @ctxt: an XML parser context
6761 *
6762 * parse a start of tag either for rule element or
6763 * EmptyElement. In both case we don't parse the tag closing chars.
6764 *
6765 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6766 *
6767 * [ WFC: Unique Att Spec ]
6768 * No attribute name may appear more than once in the same start-tag or
6769 * empty-element tag.
6770 *
6771 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6772 *
6773 * [ WFC: Unique Att Spec ]
6774 * No attribute name may appear more than once in the same start-tag or
6775 * empty-element tag.
6776 *
6777 * With namespace:
6778 *
6779 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6780 *
6781 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6782 *
6783 * Returns the element name parsed
6784 */
6785
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006786const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00006787xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006788 const xmlChar *name;
6789 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00006790 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006791 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00006792 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006793 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006794 int i;
6795
6796 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006797 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006798
6799 name = xmlParseName(ctxt);
6800 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006801 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006802 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006803 return(NULL);
6804 }
6805
6806 /*
6807 * Now parse the attributes, it ends up with the ending
6808 *
6809 * (S Attribute)* S?
6810 */
6811 SKIP_BLANKS;
6812 GROW;
6813
Daniel Veillard21a0f912001-02-25 19:54:14 +00006814 while ((RAW != '>') &&
6815 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00006816 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006818 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006819
6820 attname = xmlParseAttribute(ctxt, &attvalue);
6821 if ((attname != NULL) && (attvalue != NULL)) {
6822 /*
6823 * [ WFC: Unique Att Spec ]
6824 * No attribute name may appear more than once in the same
6825 * start-tag or empty-element tag.
6826 */
6827 for (i = 0; i < nbatts;i += 2) {
6828 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006829 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00006830 xmlFree(attvalue);
6831 goto failed;
6832 }
6833 }
Owen Taylor3473f882001-02-23 17:55:21 +00006834 /*
6835 * Add the pair to atts
6836 */
6837 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006838 maxatts = 22; /* allow for 10 attrs by default */
6839 atts = (const xmlChar **)
6840 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00006841 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006842 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006843 if (attvalue != NULL)
6844 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006845 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006846 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006847 ctxt->atts = atts;
6848 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006849 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006850 const xmlChar **n;
6851
Owen Taylor3473f882001-02-23 17:55:21 +00006852 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006853 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006854 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006855 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006856 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006857 if (attvalue != NULL)
6858 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006859 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00006860 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006861 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006862 ctxt->atts = atts;
6863 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00006864 }
6865 atts[nbatts++] = attname;
6866 atts[nbatts++] = attvalue;
6867 atts[nbatts] = NULL;
6868 atts[nbatts + 1] = NULL;
6869 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00006870 if (attvalue != NULL)
6871 xmlFree(attvalue);
6872 }
6873
6874failed:
6875
Daniel Veillard3772de32002-12-17 10:31:45 +00006876 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00006877 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6878 break;
William M. Brack76e95df2003-10-18 16:20:14 +00006879 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006880 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6881 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006882 }
6883 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00006884 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
6885 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006886 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
6887 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006888 break;
6889 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006890 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00006891 GROW;
6892 }
6893
6894 /*
6895 * SAX: Start of Element !
6896 */
6897 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00006898 (!ctxt->disableSAX)) {
6899 if (nbatts > 0)
6900 ctxt->sax->startElement(ctxt->userData, name, atts);
6901 else
6902 ctxt->sax->startElement(ctxt->userData, name, NULL);
6903 }
Owen Taylor3473f882001-02-23 17:55:21 +00006904
6905 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006906 /* Free only the content strings */
6907 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006908 if (atts[i] != NULL)
6909 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00006910 }
6911 return(name);
6912}
6913
6914/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00006915 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00006916 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00006917 * @line: line of the start tag
6918 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00006919 *
6920 * parse an end of tag
6921 *
6922 * [42] ETag ::= '</' Name S? '>'
6923 *
6924 * With namespace
6925 *
6926 * [NS 9] ETag ::= '</' QName S? '>'
6927 */
6928
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006929static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00006930xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006931 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006932
6933 GROW;
6934 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006935 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006936 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006937 return;
6938 }
6939 SKIP(2);
6940
Daniel Veillard46de64e2002-05-29 08:21:33 +00006941 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006942
6943 /*
6944 * We should definitely be at the ending "S? '>'" part
6945 */
6946 GROW;
6947 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00006948 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006949 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006950 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006951 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006952
6953 /*
6954 * [ WFC: Element Type Match ]
6955 * The Name in an element's end-tag must match the element type in the
6956 * start-tag.
6957 *
6958 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00006959 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006960 if (name == NULL) name = BAD_CAST "unparseable";
6961 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006962 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00006963 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00006964 }
6965
6966 /*
6967 * SAX: End of Tag
6968 */
6969 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6970 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00006971 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006972
Daniel Veillarde57ec792003-09-10 10:50:59 +00006973 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006974 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006975 return;
6976}
6977
6978/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00006979 * xmlParseEndTag:
6980 * @ctxt: an XML parser context
6981 *
6982 * parse an end of tag
6983 *
6984 * [42] ETag ::= '</' Name S? '>'
6985 *
6986 * With namespace
6987 *
6988 * [NS 9] ETag ::= '</' QName S? '>'
6989 */
6990
6991void
6992xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00006993 xmlParseEndTag1(ctxt, 0);
6994}
Daniel Veillard81273902003-09-30 00:43:48 +00006995#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00006996
6997/************************************************************************
6998 * *
6999 * SAX 2 specific operations *
7000 * *
7001 ************************************************************************/
7002
7003static const xmlChar *
7004xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
7005 int len = 0, l;
7006 int c;
7007 int count = 0;
7008
7009 /*
7010 * Handler for more complex cases
7011 */
7012 GROW;
7013 c = CUR_CHAR(l);
7014 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007015 (!IS_LETTER(c) && (c != '_'))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007016 return(NULL);
7017 }
7018
7019 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
William M. Brack871611b2003-10-18 04:53:14 +00007020 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007021 (c == '.') || (c == '-') || (c == '_') ||
William M. Brack871611b2003-10-18 04:53:14 +00007022 (IS_COMBINING(c)) ||
7023 (IS_EXTENDER(c)))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007024 if (count++ > 100) {
7025 count = 0;
7026 GROW;
7027 }
7028 len += l;
7029 NEXTL(l);
7030 c = CUR_CHAR(l);
7031 }
7032 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
7033}
7034
7035/*
7036 * xmlGetNamespace:
7037 * @ctxt: an XML parser context
7038 * @prefix: the prefix to lookup
7039 *
7040 * Lookup the namespace name for the @prefix (which ca be NULL)
7041 * The prefix must come from the @ctxt->dict dictionnary
7042 *
7043 * Returns the namespace name or NULL if not bound
7044 */
7045static const xmlChar *
7046xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
7047 int i;
7048
Daniel Veillarde57ec792003-09-10 10:50:59 +00007049 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007050 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007051 if (ctxt->nsTab[i] == prefix) {
7052 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
7053 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007054 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007055 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007056 return(NULL);
7057}
7058
7059/**
7060 * xmlParseNCName:
7061 * @ctxt: an XML parser context
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007062 * @len: lenght of the string parsed
Daniel Veillard0fb18932003-09-07 09:14:37 +00007063 *
7064 * parse an XML name.
7065 *
7066 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
7067 * CombiningChar | Extender
7068 *
7069 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
7070 *
7071 * Returns the Name parsed or NULL
7072 */
7073
7074static const xmlChar *
7075xmlParseNCName(xmlParserCtxtPtr ctxt) {
7076 const xmlChar *in;
7077 const xmlChar *ret;
7078 int count = 0;
7079
7080 /*
7081 * Accelerator for simple ASCII names
7082 */
7083 in = ctxt->input->cur;
7084 if (((*in >= 0x61) && (*in <= 0x7A)) ||
7085 ((*in >= 0x41) && (*in <= 0x5A)) ||
7086 (*in == '_')) {
7087 in++;
7088 while (((*in >= 0x61) && (*in <= 0x7A)) ||
7089 ((*in >= 0x41) && (*in <= 0x5A)) ||
7090 ((*in >= 0x30) && (*in <= 0x39)) ||
7091 (*in == '_') || (*in == '-') ||
7092 (*in == '.'))
7093 in++;
7094 if ((*in > 0) && (*in < 0x80)) {
7095 count = in - ctxt->input->cur;
7096 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
7097 ctxt->input->cur = in;
7098 ctxt->nbChars += count;
7099 ctxt->input->col += count;
7100 if (ret == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007101 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007102 }
7103 return(ret);
7104 }
7105 }
7106 return(xmlParseNCNameComplex(ctxt));
7107}
7108
7109/**
7110 * xmlParseQName:
7111 * @ctxt: an XML parser context
7112 * @prefix: pointer to store the prefix part
7113 *
7114 * parse an XML Namespace QName
7115 *
7116 * [6] QName ::= (Prefix ':')? LocalPart
7117 * [7] Prefix ::= NCName
7118 * [8] LocalPart ::= NCName
7119 *
7120 * Returns the Name parsed or NULL
7121 */
7122
7123static const xmlChar *
7124xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
7125 const xmlChar *l, *p;
7126
7127 GROW;
7128
7129 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007130 if (l == NULL) {
7131 if (CUR == ':') {
7132 l = xmlParseName(ctxt);
7133 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007134 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7135 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007136 *prefix = NULL;
7137 return(l);
7138 }
7139 }
7140 return(NULL);
7141 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007142 if (CUR == ':') {
7143 NEXT;
7144 p = l;
7145 l = xmlParseNCName(ctxt);
7146 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007147 xmlChar *tmp;
7148
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007149 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7150 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007151 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
7152 p = xmlDictLookup(ctxt->dict, tmp, -1);
7153 if (tmp != NULL) xmlFree(tmp);
7154 *prefix = NULL;
7155 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007156 }
7157 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007158 xmlChar *tmp;
7159
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007160 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
7161 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007162 NEXT;
7163 tmp = (xmlChar *) xmlParseName(ctxt);
7164 if (tmp != NULL) {
7165 tmp = xmlBuildQName(tmp, l, NULL, 0);
7166 l = xmlDictLookup(ctxt->dict, tmp, -1);
7167 if (tmp != NULL) xmlFree(tmp);
7168 *prefix = p;
7169 return(l);
7170 }
7171 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
7172 l = xmlDictLookup(ctxt->dict, tmp, -1);
7173 if (tmp != NULL) xmlFree(tmp);
7174 *prefix = p;
7175 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007176 }
7177 *prefix = p;
7178 } else
7179 *prefix = NULL;
7180 return(l);
7181}
7182
7183/**
7184 * xmlParseQNameAndCompare:
7185 * @ctxt: an XML parser context
7186 * @name: the localname
7187 * @prefix: the prefix, if any.
7188 *
7189 * parse an XML name and compares for match
7190 * (specialized for endtag parsing)
7191 *
7192 * Returns NULL for an illegal name, (xmlChar*) 1 for success
7193 * and the name for mismatch
7194 */
7195
7196static const xmlChar *
7197xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
7198 xmlChar const *prefix) {
7199 const xmlChar *cmp = name;
7200 const xmlChar *in;
7201 const xmlChar *ret;
7202 const xmlChar *prefix2;
7203
7204 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
7205
7206 GROW;
7207 in = ctxt->input->cur;
7208
7209 cmp = prefix;
7210 while (*in != 0 && *in == *cmp) {
7211 ++in;
7212 ++cmp;
7213 }
7214 if ((*cmp == 0) && (*in == ':')) {
7215 in++;
7216 cmp = name;
7217 while (*in != 0 && *in == *cmp) {
7218 ++in;
7219 ++cmp;
7220 }
William M. Brack76e95df2003-10-18 16:20:14 +00007221 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007222 /* success */
7223 ctxt->input->cur = in;
7224 return((const xmlChar*) 1);
7225 }
7226 }
7227 /*
7228 * all strings coms from the dictionary, equality can be done directly
7229 */
7230 ret = xmlParseQName (ctxt, &prefix2);
7231 if ((ret == name) && (prefix == prefix2))
7232 return((const xmlChar*) 1);
7233 return ret;
7234}
7235
7236/**
7237 * xmlParseAttValueInternal:
7238 * @ctxt: an XML parser context
7239 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007240 * @alloc: whether the attribute was reallocated as a new string
7241 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00007242 *
7243 * parse a value for an attribute.
7244 * NOTE: if no normalization is needed, the routine will return pointers
7245 * directly from the data buffer.
7246 *
7247 * 3.3.3 Attribute-Value Normalization:
7248 * Before the value of an attribute is passed to the application or
7249 * checked for validity, the XML processor must normalize it as follows:
7250 * - a character reference is processed by appending the referenced
7251 * character to the attribute value
7252 * - an entity reference is processed by recursively processing the
7253 * replacement text of the entity
7254 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
7255 * appending #x20 to the normalized value, except that only a single
7256 * #x20 is appended for a "#xD#xA" sequence that is part of an external
7257 * parsed entity or the literal entity value of an internal parsed entity
7258 * - other characters are processed by appending them to the normalized value
7259 * If the declared value is not CDATA, then the XML processor must further
7260 * process the normalized attribute value by discarding any leading and
7261 * trailing space (#x20) characters, and by replacing sequences of space
7262 * (#x20) characters by a single space (#x20) character.
7263 * All attributes for which no declaration has been read should be treated
7264 * by a non-validating parser as if declared CDATA.
7265 *
7266 * Returns the AttValue parsed or NULL. The value has to be freed by the
7267 * caller if it was copied, this can be detected by val[*len] == 0.
7268 */
7269
7270static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007271xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
7272 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00007273{
Daniel Veillard0fb18932003-09-07 09:14:37 +00007274 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007275 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007276 xmlChar *ret = NULL;
7277
7278 GROW;
7279 in = (xmlChar *) CUR_PTR;
7280 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007281 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007282 return (NULL);
7283 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007284 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007285
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007286 /*
7287 * try to handle in this routine the most common case where no
7288 * allocation of a new string is required and where content is
7289 * pure ASCII.
7290 */
7291 limit = *in++;
7292 end = ctxt->input->end;
7293 start = in;
7294 if (in >= end) {
7295 const xmlChar *oldbase = ctxt->input->base;
7296 GROW;
7297 if (oldbase != ctxt->input->base) {
7298 long delta = ctxt->input->base - oldbase;
7299 start = start + delta;
7300 in = in + delta;
7301 }
7302 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007303 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007304 if (normalize) {
7305 /*
7306 * Skip any leading spaces
7307 */
7308 while ((in < end) && (*in != limit) &&
7309 ((*in == 0x20) || (*in == 0x9) ||
7310 (*in == 0xA) || (*in == 0xD))) {
7311 in++;
7312 start = in;
7313 if (in >= end) {
7314 const xmlChar *oldbase = ctxt->input->base;
7315 GROW;
7316 if (oldbase != ctxt->input->base) {
7317 long delta = ctxt->input->base - oldbase;
7318 start = start + delta;
7319 in = in + delta;
7320 }
7321 end = ctxt->input->end;
7322 }
7323 }
7324 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7325 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7326 if ((*in++ == 0x20) && (*in == 0x20)) break;
7327 if (in >= end) {
7328 const xmlChar *oldbase = ctxt->input->base;
7329 GROW;
7330 if (oldbase != ctxt->input->base) {
7331 long delta = ctxt->input->base - oldbase;
7332 start = start + delta;
7333 in = in + delta;
7334 }
7335 end = ctxt->input->end;
7336 }
7337 }
7338 last = in;
7339 /*
7340 * skip the trailing blanks
7341 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00007342 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007343 while ((in < end) && (*in != limit) &&
7344 ((*in == 0x20) || (*in == 0x9) ||
7345 (*in == 0xA) || (*in == 0xD))) {
7346 in++;
7347 if (in >= end) {
7348 const xmlChar *oldbase = ctxt->input->base;
7349 GROW;
7350 if (oldbase != ctxt->input->base) {
7351 long delta = ctxt->input->base - oldbase;
7352 start = start + delta;
7353 in = in + delta;
7354 last = last + delta;
7355 }
7356 end = ctxt->input->end;
7357 }
7358 }
7359 if (*in != limit) goto need_complex;
7360 } else {
7361 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
7362 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
7363 in++;
7364 if (in >= end) {
7365 const xmlChar *oldbase = ctxt->input->base;
7366 GROW;
7367 if (oldbase != ctxt->input->base) {
7368 long delta = ctxt->input->base - oldbase;
7369 start = start + delta;
7370 in = in + delta;
7371 }
7372 end = ctxt->input->end;
7373 }
7374 }
7375 last = in;
7376 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007377 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007378 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007379 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007380 *len = last - start;
7381 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007382 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007383 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007384 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007385 }
7386 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007387 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007388 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007389need_complex:
7390 if (alloc) *alloc = 1;
7391 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007392}
7393
7394/**
7395 * xmlParseAttribute2:
7396 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007397 * @pref: the element prefix
7398 * @elem: the element name
7399 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00007400 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007401 * @len: an int * to save the length of the attribute
7402 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00007403 *
7404 * parse an attribute in the new SAX2 framework.
7405 *
7406 * Returns the attribute name, and the value in *value, .
7407 */
7408
7409static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007410xmlParseAttribute2(xmlParserCtxtPtr ctxt,
7411 const xmlChar *pref, const xmlChar *elem,
7412 const xmlChar **prefix, xmlChar **value,
7413 int *len, int *alloc) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007414 const xmlChar *name;
7415 xmlChar *val;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007416 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007417
7418 *value = NULL;
7419 GROW;
7420 name = xmlParseQName(ctxt, prefix);
7421 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007422 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7423 "error parsing attribute name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007424 return(NULL);
7425 }
7426
7427 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007428 * get the type if needed
7429 */
7430 if (ctxt->attsSpecial != NULL) {
7431 int type;
7432
7433 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
7434 pref, elem, *prefix, name);
7435 if (type != 0) normalize = 1;
7436 }
7437
7438 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00007439 * read the value
7440 */
7441 SKIP_BLANKS;
7442 if (RAW == '=') {
7443 NEXT;
7444 SKIP_BLANKS;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007445 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007446 ctxt->instate = XML_PARSER_CONTENT;
7447 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007448 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007449 "Specification mandate value for attribute %s\n", name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007450 return(NULL);
7451 }
7452
7453 /*
7454 * Check that xml:lang conforms to the specification
7455 * No more registered as an error, just generate a warning now
7456 * since this was deprecated in XML second edition
7457 */
7458 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7459 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007460 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7461 "Malformed value for xml:lang : %s\n",
7462 val, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007463 }
7464 }
7465
7466 /*
7467 * Check that xml:space conforms to the specification
7468 */
7469 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7470 if (xmlStrEqual(val, BAD_CAST "default"))
7471 *(ctxt->space) = 0;
7472 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7473 *(ctxt->space) = 1;
7474 else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007475 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007476"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
7477 val);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007478 }
7479 }
7480
7481 *value = val;
7482 return(name);
7483}
7484
7485/**
7486 * xmlParseStartTag2:
7487 * @ctxt: an XML parser context
7488 *
7489 * parse a start of tag either for rule element or
7490 * EmptyElement. In both case we don't parse the tag closing chars.
7491 * This routine is called when running SAX2 parsing
7492 *
7493 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7494 *
7495 * [ WFC: Unique Att Spec ]
7496 * No attribute name may appear more than once in the same start-tag or
7497 * empty-element tag.
7498 *
7499 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7500 *
7501 * [ WFC: Unique Att Spec ]
7502 * No attribute name may appear more than once in the same start-tag or
7503 * empty-element tag.
7504 *
7505 * With namespace:
7506 *
7507 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7508 *
7509 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7510 *
7511 * Returns the element name parsed
7512 */
7513
7514static const xmlChar *
7515xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007516 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007517 const xmlChar *localname;
7518 const xmlChar *prefix;
7519 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007520 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007521 const xmlChar *nsname;
7522 xmlChar *attvalue;
7523 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007524 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007525 int nratts, nbatts, nbdef;
7526 int i, j, nbNs, attval;
7527 const xmlChar *base;
7528 unsigned long cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007529
7530 if (RAW != '<') return(NULL);
7531 NEXT1;
7532
7533 /*
7534 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
7535 * point since the attribute values may be stored as pointers to
7536 * the buffer and calling SHRINK would destroy them !
7537 * The Shrinking is only possible once the full set of attribute
7538 * callbacks have been done.
7539 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007540reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007541 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007542 base = ctxt->input->base;
7543 cur = ctxt->input->cur - ctxt->input->base;
7544 nbatts = 0;
7545 nratts = 0;
7546 nbdef = 0;
7547 nbNs = 0;
7548 attval = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007549
7550 localname = xmlParseQName(ctxt, &prefix);
7551 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7553 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007554 return(NULL);
7555 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007556 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007557
7558 /*
7559 * Now parse the attributes, it ends up with the ending
7560 *
7561 * (S Attribute)* S?
7562 */
7563 SKIP_BLANKS;
7564 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007565 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007566
7567 while ((RAW != '>') &&
7568 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007569 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007570 const xmlChar *q = CUR_PTR;
7571 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007572 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007573
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00007574 attname = xmlParseAttribute2(ctxt, prefix, localname,
7575 &aprefix, &attvalue, &len, &alloc);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007576 if ((attname != NULL) && (attvalue != NULL)) {
7577 if (len < 0) len = xmlStrlen(attvalue);
7578 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007579 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7580 xmlURIPtr uri;
7581
7582 if (*URL != 0) {
7583 uri = xmlParseURI((const char *) URL);
7584 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007585 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7586 "xmlns: %s not a valid URI\n",
7587 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007588 } else {
7589 if (uri->scheme == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007590 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7591 "xmlns: URI %s is not absolute\n",
7592 URL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007593 }
7594 xmlFreeURI(uri);
7595 }
7596 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007597 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007598 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007599 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007600 for (j = 1;j <= nbNs;j++)
7601 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7602 break;
7603 if (j <= nbNs)
7604 xmlErrAttributeDup(ctxt, NULL, attname);
7605 else
7606 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007607 if (alloc != 0) xmlFree(attvalue);
7608 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007609 continue;
7610 }
7611 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00007612 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
7613 xmlURIPtr uri;
7614
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007615 if (attname == ctxt->str_xml) {
7616 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007617 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
7618 "xml namespace prefix mapped to wrong URI\n",
7619 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007620 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007621 /*
7622 * Do not keep a namespace definition node
7623 */
7624 if (alloc != 0) xmlFree(attvalue);
7625 SKIP_BLANKS;
7626 continue;
7627 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007628 uri = xmlParseURI((const char *) URL);
7629 if (uri == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007630 xmlWarningMsg(ctxt, XML_WAR_NS_URI,
7631 "xmlns:%s: '%s' is not a valid URI\n",
7632 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007633 } else {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00007634 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007635 xmlWarningMsg(ctxt, XML_WAR_NS_URI_RELATIVE,
7636 "xmlns:%s: URI %s is not absolute\n",
7637 attname, URL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007638 }
7639 xmlFreeURI(uri);
7640 }
7641
Daniel Veillard0fb18932003-09-07 09:14:37 +00007642 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007643 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00007644 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007645 for (j = 1;j <= nbNs;j++)
7646 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7647 break;
7648 if (j <= nbNs)
7649 xmlErrAttributeDup(ctxt, aprefix, attname);
7650 else
7651 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007652 if (alloc != 0) xmlFree(attvalue);
7653 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00007654 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007655 continue;
7656 }
7657
7658 /*
7659 * Add the pair to atts
7660 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007661 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7662 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007663 if (attvalue[len] == 0)
7664 xmlFree(attvalue);
7665 goto failed;
7666 }
7667 maxatts = ctxt->maxatts;
7668 atts = ctxt->atts;
7669 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00007670 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007671 atts[nbatts++] = attname;
7672 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007673 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00007674 atts[nbatts++] = attvalue;
7675 attvalue += len;
7676 atts[nbatts++] = attvalue;
7677 /*
7678 * tag if some deallocation is needed
7679 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00007680 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007681 } else {
7682 if ((attvalue != NULL) && (attvalue[len] == 0))
7683 xmlFree(attvalue);
7684 }
7685
7686failed:
7687
7688 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00007689 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007690 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7691 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007692 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7694 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00007695 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007696 }
7697 SKIP_BLANKS;
7698 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7699 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007700 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007701 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00007702 break;
7703 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007704 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00007705 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00007706 }
7707
Daniel Veillard0fb18932003-09-07 09:14:37 +00007708 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00007709 * The attributes defaulting
7710 */
7711 if (ctxt->attsDefault != NULL) {
7712 xmlDefAttrsPtr defaults;
7713
7714 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
7715 if (defaults != NULL) {
7716 for (i = 0;i < defaults->nbAttrs;i++) {
7717 attname = defaults->values[4 * i];
7718 aprefix = defaults->values[4 * i + 1];
7719
7720 /*
7721 * special work for namespaces defaulted defs
7722 */
7723 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
7724 /*
7725 * check that it's not a defined namespace
7726 */
7727 for (j = 1;j <= nbNs;j++)
7728 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
7729 break;
7730 if (j <= nbNs) continue;
7731
7732 nsname = xmlGetNamespace(ctxt, NULL);
7733 if (nsname != defaults->values[4 * i + 2]) {
7734 if (nsPush(ctxt, NULL,
7735 defaults->values[4 * i + 2]) > 0)
7736 nbNs++;
7737 }
7738 } else if (aprefix == ctxt->str_xmlns) {
7739 /*
7740 * check that it's not a defined namespace
7741 */
7742 for (j = 1;j <= nbNs;j++)
7743 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
7744 break;
7745 if (j <= nbNs) continue;
7746
7747 nsname = xmlGetNamespace(ctxt, attname);
7748 if (nsname != defaults->values[2]) {
7749 if (nsPush(ctxt, attname,
7750 defaults->values[4 * i + 2]) > 0)
7751 nbNs++;
7752 }
7753 } else {
7754 /*
7755 * check that it's not a defined attribute
7756 */
7757 for (j = 0;j < nbatts;j+=5) {
7758 if ((attname == atts[j]) && (aprefix == atts[j+1]))
7759 break;
7760 }
7761 if (j < nbatts) continue;
7762
7763 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
7764 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00007765 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007766 }
7767 maxatts = ctxt->maxatts;
7768 atts = ctxt->atts;
7769 }
7770 atts[nbatts++] = attname;
7771 atts[nbatts++] = aprefix;
7772 if (aprefix == NULL)
7773 atts[nbatts++] = NULL;
7774 else
7775 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
7776 atts[nbatts++] = defaults->values[4 * i + 2];
7777 atts[nbatts++] = defaults->values[4 * i + 3];
7778 nbdef++;
7779 }
7780 }
7781 }
7782 }
7783
Daniel Veillarde70c8772003-11-25 07:21:18 +00007784 /*
7785 * The attributes checkings
7786 */
7787 for (i = 0; i < nbatts;i += 5) {
7788 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
7789 if ((atts[i + 1] != NULL) && (nsname == NULL)) {
7790 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7791 "Namespace prefix %s for %s on %s is not defined\n",
7792 atts[i + 1], atts[i], localname);
7793 }
7794 atts[i + 2] = nsname;
7795 /*
7796 * [ WFC: Unique Att Spec ]
7797 * No attribute name may appear more than once in the same
7798 * start-tag or empty-element tag.
7799 * As extended by the Namespace in XML REC.
7800 */
7801 for (j = 0; j < i;j += 5) {
7802 if (atts[i] == atts[j]) {
7803 if (atts[i+1] == atts[j+1]) {
7804 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
7805 break;
7806 }
7807 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
7808 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
7809 "Namespaced Attribute %s in '%s' redefined\n",
7810 atts[i], nsname, NULL);
7811 break;
7812 }
7813 }
7814 }
7815 }
7816
Daniel Veillarde57ec792003-09-10 10:50:59 +00007817 nsname = xmlGetNamespace(ctxt, prefix);
7818 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007819 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
7820 "Namespace prefix %s on %s is not defined\n",
7821 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007822 }
7823 *pref = prefix;
7824 *URI = nsname;
7825
7826 /*
7827 * SAX: Start of Element !
7828 */
7829 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
7830 (!ctxt->disableSAX)) {
7831 if (nbNs > 0)
7832 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7833 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
7834 nbatts / 5, nbdef, atts);
7835 else
7836 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
7837 nsname, 0, NULL, nbatts / 5, nbdef, atts);
7838 }
7839
7840 /*
7841 * Free up attribute allocated strings if needed
7842 */
7843 if (attval != 0) {
7844 for (i = 3,j = 0; j < nratts;i += 5,j++)
7845 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7846 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007847 }
7848
7849 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00007850
7851base_changed:
7852 /*
7853 * the attribute strings are valid iif the base didn't changed
7854 */
7855 if (attval != 0) {
7856 for (i = 3,j = 0; j < nratts;i += 5,j++)
7857 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
7858 xmlFree((xmlChar *) atts[i]);
7859 }
7860 ctxt->input->cur = ctxt->input->base + cur;
7861 if (ctxt->wellFormed == 1) {
7862 goto reparse;
7863 }
7864 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007865}
7866
7867/**
7868 * xmlParseEndTag2:
7869 * @ctxt: an XML parser context
7870 * @line: line of the start tag
7871 * @nsNr: number of namespaces on the start tag
7872 *
7873 * parse an end of tag
7874 *
7875 * [42] ETag ::= '</' Name S? '>'
7876 *
7877 * With namespace
7878 *
7879 * [NS 9] ETag ::= '</' QName S? '>'
7880 */
7881
7882static void
7883xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007884 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00007885 const xmlChar *name;
7886
7887 GROW;
7888 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007889 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007890 return;
7891 }
7892 SKIP(2);
7893
William M. Brack13dfa872004-09-18 04:52:08 +00007894 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007895 if (ctxt->input->cur[tlen] == '>') {
7896 ctxt->input->cur += tlen + 1;
7897 goto done;
7898 }
7899 ctxt->input->cur += tlen;
7900 name = (xmlChar*)1;
7901 } else {
7902 if (prefix == NULL)
7903 name = xmlParseNameAndCompare(ctxt, ctxt->name);
7904 else
7905 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
7906 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00007907
7908 /*
7909 * We should definitely be at the ending "S? '>'" part
7910 */
7911 GROW;
7912 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007913 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007914 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007915 } else
7916 NEXT1;
7917
7918 /*
7919 * [ WFC: Element Type Match ]
7920 * The Name in an element's end-tag must match the element type in the
7921 * start-tag.
7922 *
7923 */
7924 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007925 if (name == NULL) name = BAD_CAST "unparseable";
7926 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00007927 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007928 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00007929 }
7930
7931 /*
7932 * SAX: End of Tag
7933 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00007934done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00007935 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
7936 (!ctxt->disableSAX))
7937 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
7938
Daniel Veillard0fb18932003-09-07 09:14:37 +00007939 spacePop(ctxt);
7940 if (nsNr != 0)
7941 nsPop(ctxt, nsNr);
7942 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007943}
7944
7945/**
Owen Taylor3473f882001-02-23 17:55:21 +00007946 * xmlParseCDSect:
7947 * @ctxt: an XML parser context
7948 *
7949 * Parse escaped pure raw content.
7950 *
7951 * [18] CDSect ::= CDStart CData CDEnd
7952 *
7953 * [19] CDStart ::= '<![CDATA['
7954 *
7955 * [20] Data ::= (Char* - (Char* ']]>' Char*))
7956 *
7957 * [21] CDEnd ::= ']]>'
7958 */
7959void
7960xmlParseCDSect(xmlParserCtxtPtr ctxt) {
7961 xmlChar *buf = NULL;
7962 int len = 0;
7963 int size = XML_PARSER_BUFFER_SIZE;
7964 int r, rl;
7965 int s, sl;
7966 int cur, l;
7967 int count = 0;
7968
Daniel Veillard8f597c32003-10-06 08:19:27 +00007969 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007970 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007971 SKIP(9);
7972 } else
7973 return;
7974
7975 ctxt->instate = XML_PARSER_CDATA_SECTION;
7976 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00007977 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007978 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007979 ctxt->instate = XML_PARSER_CONTENT;
7980 return;
7981 }
7982 NEXTL(rl);
7983 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00007984 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007985 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007986 ctxt->instate = XML_PARSER_CONTENT;
7987 return;
7988 }
7989 NEXTL(sl);
7990 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00007991 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00007992 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007993 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007994 return;
7995 }
William M. Brack871611b2003-10-18 04:53:14 +00007996 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00007997 ((r != ']') || (s != ']') || (cur != '>'))) {
7998 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00007999 xmlChar *tmp;
8000
Owen Taylor3473f882001-02-23 17:55:21 +00008001 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008002 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8003 if (tmp == NULL) {
8004 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008006 return;
8007 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008008 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008009 }
8010 COPY_BUF(rl,buf,len,r);
8011 r = s;
8012 rl = sl;
8013 s = cur;
8014 sl = l;
8015 count++;
8016 if (count > 50) {
8017 GROW;
8018 count = 0;
8019 }
8020 NEXTL(l);
8021 cur = CUR_CHAR(l);
8022 }
8023 buf[len] = 0;
8024 ctxt->instate = XML_PARSER_CONTENT;
8025 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008026 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00008027 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008028 xmlFree(buf);
8029 return;
8030 }
8031 NEXTL(l);
8032
8033 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008034 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00008035 */
8036 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8037 if (ctxt->sax->cdataBlock != NULL)
8038 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00008039 else if (ctxt->sax->characters != NULL)
8040 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00008041 }
8042 xmlFree(buf);
8043}
8044
8045/**
8046 * xmlParseContent:
8047 * @ctxt: an XML parser context
8048 *
8049 * Parse a content:
8050 *
8051 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8052 */
8053
8054void
8055xmlParseContent(xmlParserCtxtPtr ctxt) {
8056 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00008057 while ((RAW != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00008058 ((RAW != '<') || (NXT(1) != '/'))) {
8059 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008060 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00008061 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00008062
8063 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008064 * First case : a Processing Instruction.
8065 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00008066 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008067 xmlParsePI(ctxt);
8068 }
8069
8070 /*
8071 * Second case : a CDSection
8072 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00008073 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00008074 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008075 xmlParseCDSect(ctxt);
8076 }
8077
8078 /*
8079 * Third case : a comment
8080 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008081 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00008082 (NXT(2) == '-') && (NXT(3) == '-')) {
8083 xmlParseComment(ctxt);
8084 ctxt->instate = XML_PARSER_CONTENT;
8085 }
8086
8087 /*
8088 * Fourth case : a sub-element.
8089 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00008090 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00008091 xmlParseElement(ctxt);
8092 }
8093
8094 /*
8095 * Fifth case : a reference. If if has not been resolved,
8096 * parsing returns it's Name, create the node
8097 */
8098
Daniel Veillard21a0f912001-02-25 19:54:14 +00008099 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00008100 xmlParseReference(ctxt);
8101 }
8102
8103 /*
8104 * Last case, text. Note that References are handled directly.
8105 */
8106 else {
8107 xmlParseCharData(ctxt, 0);
8108 }
8109
8110 GROW;
8111 /*
8112 * Pop-up of finished entities.
8113 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00008114 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00008115 xmlPopInput(ctxt);
8116 SHRINK;
8117
Daniel Veillardfdc91562002-07-01 21:52:03 +00008118 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008119 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8120 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008121 ctxt->instate = XML_PARSER_EOF;
8122 break;
8123 }
8124 }
8125}
8126
8127/**
8128 * xmlParseElement:
8129 * @ctxt: an XML parser context
8130 *
8131 * parse an XML element, this is highly recursive
8132 *
8133 * [39] element ::= EmptyElemTag | STag content ETag
8134 *
8135 * [ WFC: Element Type Match ]
8136 * The Name in an element's end-tag must match the element type in the
8137 * start-tag.
8138 *
Owen Taylor3473f882001-02-23 17:55:21 +00008139 */
8140
8141void
8142xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008143 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008144 const xmlChar *prefix;
8145 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00008146 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008147 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00008148 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008149 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008150
8151 /* Capture start position */
8152 if (ctxt->record_info) {
8153 node_info.begin_pos = ctxt->input->consumed +
8154 (CUR_PTR - ctxt->input->base);
8155 node_info.begin_line = ctxt->input->line;
8156 }
8157
8158 if (ctxt->spaceNr == 0)
8159 spacePush(ctxt, -1);
8160 else
8161 spacePush(ctxt, *ctxt->space);
8162
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008163 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00008164#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008165 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00008166#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008167 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00008168#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008169 else
8170 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008171#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008172 if (name == NULL) {
8173 spacePop(ctxt);
8174 return;
8175 }
8176 namePush(ctxt, name);
8177 ret = ctxt->node;
8178
Daniel Veillard4432df22003-09-28 18:58:27 +00008179#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008180 /*
8181 * [ VC: Root Element Type ]
8182 * The Name in the document type declaration must match the element
8183 * type of the root element.
8184 */
8185 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8186 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8187 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00008188#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008189
8190 /*
8191 * Check for an Empty Element.
8192 */
8193 if ((RAW == '/') && (NXT(1) == '>')) {
8194 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195 if (ctxt->sax2) {
8196 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
8197 (!ctxt->disableSAX))
8198 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00008199#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00008200 } else {
8201 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8202 (!ctxt->disableSAX))
8203 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00008204#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008205 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008206 namePop(ctxt);
8207 spacePop(ctxt);
8208 if (nsNr != ctxt->nsNr)
8209 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008210 if ( ret != NULL && ctxt->record_info ) {
8211 node_info.end_pos = ctxt->input->consumed +
8212 (CUR_PTR - ctxt->input->base);
8213 node_info.end_line = ctxt->input->line;
8214 node_info.node = ret;
8215 xmlParserAddNodeInfo(ctxt, &node_info);
8216 }
8217 return;
8218 }
8219 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00008220 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008221 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008222 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
8223 "Couldn't find end of Start Tag %s line %d\n",
8224 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008225
8226 /*
8227 * end of parsing of this node.
8228 */
8229 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008230 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008231 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008232 if (nsNr != ctxt->nsNr)
8233 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008234
8235 /*
8236 * Capture end position and add node
8237 */
8238 if ( ret != NULL && ctxt->record_info ) {
8239 node_info.end_pos = ctxt->input->consumed +
8240 (CUR_PTR - ctxt->input->base);
8241 node_info.end_line = ctxt->input->line;
8242 node_info.node = ret;
8243 xmlParserAddNodeInfo(ctxt, &node_info);
8244 }
8245 return;
8246 }
8247
8248 /*
8249 * Parse the content of the element:
8250 */
8251 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00008252 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008253 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00008254 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008255 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008256
8257 /*
8258 * end of parsing of this node.
8259 */
8260 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008261 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008262 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008263 if (nsNr != ctxt->nsNr)
8264 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00008265 return;
8266 }
8267
8268 /*
8269 * parse the end of tag: '</' should be here.
8270 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008271 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008272 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008273 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00008274 }
8275#ifdef LIBXML_SAX1_ENABLED
8276 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00008277 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00008278#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00008279
8280 /*
8281 * Capture end position and add node
8282 */
8283 if ( ret != NULL && ctxt->record_info ) {
8284 node_info.end_pos = ctxt->input->consumed +
8285 (CUR_PTR - ctxt->input->base);
8286 node_info.end_line = ctxt->input->line;
8287 node_info.node = ret;
8288 xmlParserAddNodeInfo(ctxt, &node_info);
8289 }
8290}
8291
8292/**
8293 * xmlParseVersionNum:
8294 * @ctxt: an XML parser context
8295 *
8296 * parse the XML version value.
8297 *
8298 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
8299 *
8300 * Returns the string giving the XML version number, or NULL
8301 */
8302xmlChar *
8303xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
8304 xmlChar *buf = NULL;
8305 int len = 0;
8306 int size = 10;
8307 xmlChar cur;
8308
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008309 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008310 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008312 return(NULL);
8313 }
8314 cur = CUR;
8315 while (((cur >= 'a') && (cur <= 'z')) ||
8316 ((cur >= 'A') && (cur <= 'Z')) ||
8317 ((cur >= '0') && (cur <= '9')) ||
8318 (cur == '_') || (cur == '.') ||
8319 (cur == ':') || (cur == '-')) {
8320 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008321 xmlChar *tmp;
8322
Owen Taylor3473f882001-02-23 17:55:21 +00008323 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008324 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8325 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008326 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008327 return(NULL);
8328 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008329 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008330 }
8331 buf[len++] = cur;
8332 NEXT;
8333 cur=CUR;
8334 }
8335 buf[len] = 0;
8336 return(buf);
8337}
8338
8339/**
8340 * xmlParseVersionInfo:
8341 * @ctxt: an XML parser context
8342 *
8343 * parse the XML version.
8344 *
8345 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8346 *
8347 * [25] Eq ::= S? '=' S?
8348 *
8349 * Returns the version string, e.g. "1.0"
8350 */
8351
8352xmlChar *
8353xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
8354 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008355
Daniel Veillarda07050d2003-10-19 14:46:32 +00008356 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008357 SKIP(7);
8358 SKIP_BLANKS;
8359 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008360 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008361 return(NULL);
8362 }
8363 NEXT;
8364 SKIP_BLANKS;
8365 if (RAW == '"') {
8366 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008367 version = xmlParseVersionNum(ctxt);
8368 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008369 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008370 } else
8371 NEXT;
8372 } else if (RAW == '\''){
8373 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008374 version = xmlParseVersionNum(ctxt);
8375 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008376 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008377 } else
8378 NEXT;
8379 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008380 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008381 }
8382 }
8383 return(version);
8384}
8385
8386/**
8387 * xmlParseEncName:
8388 * @ctxt: an XML parser context
8389 *
8390 * parse the XML encoding name
8391 *
8392 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
8393 *
8394 * Returns the encoding name value or NULL
8395 */
8396xmlChar *
8397xmlParseEncName(xmlParserCtxtPtr ctxt) {
8398 xmlChar *buf = NULL;
8399 int len = 0;
8400 int size = 10;
8401 xmlChar cur;
8402
8403 cur = CUR;
8404 if (((cur >= 'a') && (cur <= 'z')) ||
8405 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00008406 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00008407 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008408 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008409 return(NULL);
8410 }
8411
8412 buf[len++] = cur;
8413 NEXT;
8414 cur = CUR;
8415 while (((cur >= 'a') && (cur <= 'z')) ||
8416 ((cur >= 'A') && (cur <= 'Z')) ||
8417 ((cur >= '0') && (cur <= '9')) ||
8418 (cur == '.') || (cur == '_') ||
8419 (cur == '-')) {
8420 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00008421 xmlChar *tmp;
8422
Owen Taylor3473f882001-02-23 17:55:21 +00008423 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00008424 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
8425 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008426 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00008427 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00008428 return(NULL);
8429 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00008430 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00008431 }
8432 buf[len++] = cur;
8433 NEXT;
8434 cur = CUR;
8435 if (cur == 0) {
8436 SHRINK;
8437 GROW;
8438 cur = CUR;
8439 }
8440 }
8441 buf[len] = 0;
8442 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008443 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 }
8445 return(buf);
8446}
8447
8448/**
8449 * xmlParseEncodingDecl:
8450 * @ctxt: an XML parser context
8451 *
8452 * parse the XML encoding declaration
8453 *
8454 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
8455 *
8456 * this setups the conversion filters.
8457 *
8458 * Returns the encoding value or NULL
8459 */
8460
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008461const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008462xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
8463 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008464
8465 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008466 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008467 SKIP(8);
8468 SKIP_BLANKS;
8469 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008470 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008471 return(NULL);
8472 }
8473 NEXT;
8474 SKIP_BLANKS;
8475 if (RAW == '"') {
8476 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008477 encoding = xmlParseEncName(ctxt);
8478 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 } else
8481 NEXT;
8482 } else if (RAW == '\''){
8483 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00008484 encoding = xmlParseEncName(ctxt);
8485 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008486 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008487 } else
8488 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00008489 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008490 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008491 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00008492 /*
8493 * UTF-16 encoding stwich has already taken place at this stage,
8494 * more over the little-endian/big-endian selection is already done
8495 */
8496 if ((encoding != NULL) &&
8497 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
8498 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008499 if (ctxt->encoding != NULL)
8500 xmlFree((xmlChar *) ctxt->encoding);
8501 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00008502 }
8503 /*
8504 * UTF-8 encoding is handled natively
8505 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008506 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00008507 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
8508 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008509 if (ctxt->encoding != NULL)
8510 xmlFree((xmlChar *) ctxt->encoding);
8511 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00008512 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00008513 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00008514 xmlCharEncodingHandlerPtr handler;
8515
8516 if (ctxt->input->encoding != NULL)
8517 xmlFree((xmlChar *) ctxt->input->encoding);
8518 ctxt->input->encoding = encoding;
8519
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008520 handler = xmlFindCharEncodingHandler((const char *) encoding);
8521 if (handler != NULL) {
8522 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00008523 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00008524 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00008525 "Unsupported encoding %s\n", encoding);
8526 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008527 }
8528 }
8529 }
8530 return(encoding);
8531}
8532
8533/**
8534 * xmlParseSDDecl:
8535 * @ctxt: an XML parser context
8536 *
8537 * parse the XML standalone declaration
8538 *
8539 * [32] SDDecl ::= S 'standalone' Eq
8540 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
8541 *
8542 * [ VC: Standalone Document Declaration ]
8543 * TODO The standalone document declaration must have the value "no"
8544 * if any external markup declarations contain declarations of:
8545 * - attributes with default values, if elements to which these
8546 * attributes apply appear in the document without specifications
8547 * of values for these attributes, or
8548 * - entities (other than amp, lt, gt, apos, quot), if references
8549 * to those entities appear in the document, or
8550 * - attributes with values subject to normalization, where the
8551 * attribute appears in the document with a value which will change
8552 * as a result of normalization, or
8553 * - element types with element content, if white space occurs directly
8554 * within any instance of those types.
8555 *
8556 * Returns 1 if standalone, 0 otherwise
8557 */
8558
8559int
8560xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
8561 int standalone = -1;
8562
8563 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008564 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008565 SKIP(10);
8566 SKIP_BLANKS;
8567 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008568 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008569 return(standalone);
8570 }
8571 NEXT;
8572 SKIP_BLANKS;
8573 if (RAW == '\''){
8574 NEXT;
8575 if ((RAW == 'n') && (NXT(1) == 'o')) {
8576 standalone = 0;
8577 SKIP(2);
8578 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8579 (NXT(2) == 's')) {
8580 standalone = 1;
8581 SKIP(3);
8582 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008583 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 }
8585 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008586 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008587 } else
8588 NEXT;
8589 } else if (RAW == '"'){
8590 NEXT;
8591 if ((RAW == 'n') && (NXT(1) == 'o')) {
8592 standalone = 0;
8593 SKIP(2);
8594 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
8595 (NXT(2) == 's')) {
8596 standalone = 1;
8597 SKIP(3);
8598 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008599 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008600 }
8601 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008602 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008603 } else
8604 NEXT;
8605 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008606 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008607 }
8608 }
8609 return(standalone);
8610}
8611
8612/**
8613 * xmlParseXMLDecl:
8614 * @ctxt: an XML parser context
8615 *
8616 * parse an XML declaration header
8617 *
8618 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
8619 */
8620
8621void
8622xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
8623 xmlChar *version;
8624
8625 /*
8626 * We know that '<?xml' is here.
8627 */
8628 SKIP(5);
8629
William M. Brack76e95df2003-10-18 16:20:14 +00008630 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008631 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8632 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008633 }
8634 SKIP_BLANKS;
8635
8636 /*
Daniel Veillard19840942001-11-29 16:11:38 +00008637 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00008638 */
8639 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00008640 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008641 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008642 } else {
8643 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
8644 /*
8645 * TODO: Blueberry should be detected here
8646 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00008647 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
8648 "Unsupported version '%s'\n",
8649 version, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00008650 }
8651 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00008652 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00008653 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00008654 }
Owen Taylor3473f882001-02-23 17:55:21 +00008655
8656 /*
8657 * We may have the encoding declaration
8658 */
William M. Brack76e95df2003-10-18 16:20:14 +00008659 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008660 if ((RAW == '?') && (NXT(1) == '>')) {
8661 SKIP(2);
8662 return;
8663 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008664 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008665 }
8666 xmlParseEncodingDecl(ctxt);
8667 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8668 /*
8669 * The XML REC instructs us to stop parsing right here
8670 */
8671 return;
8672 }
8673
8674 /*
8675 * We may have the standalone status.
8676 */
William M. Brack76e95df2003-10-18 16:20:14 +00008677 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008678 if ((RAW == '?') && (NXT(1) == '>')) {
8679 SKIP(2);
8680 return;
8681 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008683 }
8684 SKIP_BLANKS;
8685 ctxt->input->standalone = xmlParseSDDecl(ctxt);
8686
8687 SKIP_BLANKS;
8688 if ((RAW == '?') && (NXT(1) == '>')) {
8689 SKIP(2);
8690 } else if (RAW == '>') {
8691 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008692 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008693 NEXT;
8694 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008695 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 MOVETO_ENDTAG(CUR_PTR);
8697 NEXT;
8698 }
8699}
8700
8701/**
8702 * xmlParseMisc:
8703 * @ctxt: an XML parser context
8704 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008705 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00008706 *
8707 * [27] Misc ::= Comment | PI | S
8708 */
8709
8710void
8711xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008712 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00008713 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00008714 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00008715 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008716 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00008717 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008718 NEXT;
8719 } else
8720 xmlParseComment(ctxt);
8721 }
8722}
8723
8724/**
8725 * xmlParseDocument:
8726 * @ctxt: an XML parser context
8727 *
8728 * parse an XML document (and build a tree if using the standard SAX
8729 * interface).
8730 *
8731 * [1] document ::= prolog element Misc*
8732 *
8733 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
8734 *
8735 * Returns 0, -1 in case of error. the parser context is augmented
8736 * as a result of the parsing.
8737 */
8738
8739int
8740xmlParseDocument(xmlParserCtxtPtr ctxt) {
8741 xmlChar start[4];
8742 xmlCharEncoding enc;
8743
8744 xmlInitParser();
8745
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008746 if ((ctxt == NULL) || (ctxt->input == NULL))
8747 return(-1);
8748
Owen Taylor3473f882001-02-23 17:55:21 +00008749 GROW;
8750
8751 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752 * SAX: detecting the level.
8753 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008754 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008755
8756 /*
Owen Taylor3473f882001-02-23 17:55:21 +00008757 * SAX: beginning of the document processing.
8758 */
8759 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8760 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8761
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008762 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
8763 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00008764 /*
8765 * Get the 4 first bytes and decode the charset
8766 * if enc != XML_CHAR_ENCODING_NONE
8767 * plug some encoding conversion routines.
8768 */
8769 start[0] = RAW;
8770 start[1] = NXT(1);
8771 start[2] = NXT(2);
8772 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008773 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00008774 if (enc != XML_CHAR_ENCODING_NONE) {
8775 xmlSwitchEncoding(ctxt, enc);
8776 }
Owen Taylor3473f882001-02-23 17:55:21 +00008777 }
8778
8779
8780 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008781 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008782 }
8783
8784 /*
8785 * Check for the XMLDecl in the Prolog.
8786 */
8787 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008788 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008789
8790 /*
8791 * Note that we will switch encoding on the fly.
8792 */
8793 xmlParseXMLDecl(ctxt);
8794 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8795 /*
8796 * The XML REC instructs us to stop parsing right here
8797 */
8798 return(-1);
8799 }
8800 ctxt->standalone = ctxt->input->standalone;
8801 SKIP_BLANKS;
8802 } else {
8803 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8804 }
8805 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8806 ctxt->sax->startDocument(ctxt->userData);
8807
8808 /*
8809 * The Misc part of the Prolog
8810 */
8811 GROW;
8812 xmlParseMisc(ctxt);
8813
8814 /*
8815 * Then possibly doc type declaration(s) and more Misc
8816 * (doctypedecl Misc*)?
8817 */
8818 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008819 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00008820
8821 ctxt->inSubset = 1;
8822 xmlParseDocTypeDecl(ctxt);
8823 if (RAW == '[') {
8824 ctxt->instate = XML_PARSER_DTD;
8825 xmlParseInternalSubset(ctxt);
8826 }
8827
8828 /*
8829 * Create and update the external subset.
8830 */
8831 ctxt->inSubset = 2;
8832 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
8833 (!ctxt->disableSAX))
8834 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8835 ctxt->extSubSystem, ctxt->extSubURI);
8836 ctxt->inSubset = 0;
8837
8838
8839 ctxt->instate = XML_PARSER_PROLOG;
8840 xmlParseMisc(ctxt);
8841 }
8842
8843 /*
8844 * Time to start parsing the tree itself
8845 */
8846 GROW;
8847 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008848 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
8849 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008850 } else {
8851 ctxt->instate = XML_PARSER_CONTENT;
8852 xmlParseElement(ctxt);
8853 ctxt->instate = XML_PARSER_EPILOG;
8854
8855
8856 /*
8857 * The Misc part at the end
8858 */
8859 xmlParseMisc(ctxt);
8860
Daniel Veillard561b7f82002-03-20 21:55:57 +00008861 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008862 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008863 }
8864 ctxt->instate = XML_PARSER_EOF;
8865 }
8866
8867 /*
8868 * SAX: end of the document processing.
8869 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008870 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008871 ctxt->sax->endDocument(ctxt->userData);
8872
Daniel Veillard5997aca2002-03-18 18:36:20 +00008873 /*
8874 * Remove locally kept entity definitions if the tree was not built
8875 */
8876 if ((ctxt->myDoc != NULL) &&
8877 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
8878 xmlFreeDoc(ctxt->myDoc);
8879 ctxt->myDoc = NULL;
8880 }
8881
Daniel Veillardc7612992002-02-17 22:47:37 +00008882 if (! ctxt->wellFormed) {
8883 ctxt->valid = 0;
8884 return(-1);
8885 }
Owen Taylor3473f882001-02-23 17:55:21 +00008886 return(0);
8887}
8888
8889/**
8890 * xmlParseExtParsedEnt:
8891 * @ctxt: an XML parser context
8892 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008893 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00008894 * An external general parsed entity is well-formed if it matches the
8895 * production labeled extParsedEnt.
8896 *
8897 * [78] extParsedEnt ::= TextDecl? content
8898 *
8899 * Returns 0, -1 in case of error. the parser context is augmented
8900 * as a result of the parsing.
8901 */
8902
8903int
8904xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
8905 xmlChar start[4];
8906 xmlCharEncoding enc;
8907
Daniel Veillard36e5cd52004-11-02 14:52:23 +00008908 if ((ctxt == NULL) || (ctxt->input == NULL))
8909 return(-1);
8910
Owen Taylor3473f882001-02-23 17:55:21 +00008911 xmlDefaultSAXHandlerInit();
8912
Daniel Veillard309f81d2003-09-23 09:02:53 +00008913 xmlDetectSAX2(ctxt);
8914
Owen Taylor3473f882001-02-23 17:55:21 +00008915 GROW;
8916
8917 /*
8918 * SAX: beginning of the document processing.
8919 */
8920 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8921 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
8922
8923 /*
8924 * Get the 4 first bytes and decode the charset
8925 * if enc != XML_CHAR_ENCODING_NONE
8926 * plug some encoding conversion routines.
8927 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00008928 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
8929 start[0] = RAW;
8930 start[1] = NXT(1);
8931 start[2] = NXT(2);
8932 start[3] = NXT(3);
8933 enc = xmlDetectCharEncoding(start, 4);
8934 if (enc != XML_CHAR_ENCODING_NONE) {
8935 xmlSwitchEncoding(ctxt, enc);
8936 }
Owen Taylor3473f882001-02-23 17:55:21 +00008937 }
8938
8939
8940 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008942 }
8943
8944 /*
8945 * Check for the XMLDecl in the Prolog.
8946 */
8947 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00008948 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008949
8950 /*
8951 * Note that we will switch encoding on the fly.
8952 */
8953 xmlParseXMLDecl(ctxt);
8954 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8955 /*
8956 * The XML REC instructs us to stop parsing right here
8957 */
8958 return(-1);
8959 }
8960 SKIP_BLANKS;
8961 } else {
8962 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8963 }
8964 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
8965 ctxt->sax->startDocument(ctxt->userData);
8966
8967 /*
8968 * Doing validity checking on chunk doesn't make sense
8969 */
8970 ctxt->instate = XML_PARSER_CONTENT;
8971 ctxt->validate = 0;
8972 ctxt->loadsubset = 0;
8973 ctxt->depth = 0;
8974
8975 xmlParseContent(ctxt);
8976
8977 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008978 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008979 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008980 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008981 }
8982
8983 /*
8984 * SAX: end of the document processing.
8985 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008986 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008987 ctxt->sax->endDocument(ctxt->userData);
8988
8989 if (! ctxt->wellFormed) return(-1);
8990 return(0);
8991}
8992
Daniel Veillard73b013f2003-09-30 12:36:01 +00008993#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008994/************************************************************************
8995 * *
8996 * Progressive parsing interfaces *
8997 * *
8998 ************************************************************************/
8999
9000/**
9001 * xmlParseLookupSequence:
9002 * @ctxt: an XML parser context
9003 * @first: the first char to lookup
9004 * @next: the next char to lookup or zero
9005 * @third: the next char to lookup or zero
9006 *
9007 * Try to find if a sequence (first, next, third) or just (first next) or
9008 * (first) is available in the input stream.
9009 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9010 * to avoid rescanning sequences of bytes, it DOES change the state of the
9011 * parser, do not use liberally.
9012 *
9013 * Returns the index to the current parsing point if the full sequence
9014 * is available, -1 otherwise.
9015 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009016static int
Owen Taylor3473f882001-02-23 17:55:21 +00009017xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9018 xmlChar next, xmlChar third) {
9019 int base, len;
9020 xmlParserInputPtr in;
9021 const xmlChar *buf;
9022
9023 in = ctxt->input;
9024 if (in == NULL) return(-1);
9025 base = in->cur - in->base;
9026 if (base < 0) return(-1);
9027 if (ctxt->checkIndex > base)
9028 base = ctxt->checkIndex;
9029 if (in->buf == NULL) {
9030 buf = in->base;
9031 len = in->length;
9032 } else {
9033 buf = in->buf->buffer->content;
9034 len = in->buf->buffer->use;
9035 }
9036 /* take into account the sequence length */
9037 if (third) len -= 2;
9038 else if (next) len --;
9039 for (;base < len;base++) {
9040 if (buf[base] == first) {
9041 if (third != 0) {
9042 if ((buf[base + 1] != next) ||
9043 (buf[base + 2] != third)) continue;
9044 } else if (next != 0) {
9045 if (buf[base + 1] != next) continue;
9046 }
9047 ctxt->checkIndex = 0;
9048#ifdef DEBUG_PUSH
9049 if (next == 0)
9050 xmlGenericError(xmlGenericErrorContext,
9051 "PP: lookup '%c' found at %d\n",
9052 first, base);
9053 else if (third == 0)
9054 xmlGenericError(xmlGenericErrorContext,
9055 "PP: lookup '%c%c' found at %d\n",
9056 first, next, base);
9057 else
9058 xmlGenericError(xmlGenericErrorContext,
9059 "PP: lookup '%c%c%c' found at %d\n",
9060 first, next, third, base);
9061#endif
9062 return(base - (in->cur - in->base));
9063 }
9064 }
9065 ctxt->checkIndex = base;
9066#ifdef DEBUG_PUSH
9067 if (next == 0)
9068 xmlGenericError(xmlGenericErrorContext,
9069 "PP: lookup '%c' failed\n", first);
9070 else if (third == 0)
9071 xmlGenericError(xmlGenericErrorContext,
9072 "PP: lookup '%c%c' failed\n", first, next);
9073 else
9074 xmlGenericError(xmlGenericErrorContext,
9075 "PP: lookup '%c%c%c' failed\n", first, next, third);
9076#endif
9077 return(-1);
9078}
9079
9080/**
Daniel Veillarda880b122003-04-21 21:36:41 +00009081 * xmlParseGetLasts:
9082 * @ctxt: an XML parser context
9083 * @lastlt: pointer to store the last '<' from the input
9084 * @lastgt: pointer to store the last '>' from the input
9085 *
9086 * Lookup the last < and > in the current chunk
9087 */
9088static void
9089xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
9090 const xmlChar **lastgt) {
9091 const xmlChar *tmp;
9092
9093 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
9094 xmlGenericError(xmlGenericErrorContext,
9095 "Internal error: xmlParseGetLasts\n");
9096 return;
9097 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009098 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009099 tmp = ctxt->input->end;
9100 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +00009101 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +00009102 if (tmp < ctxt->input->base) {
9103 *lastlt = NULL;
9104 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +00009105 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009106 *lastlt = tmp;
9107 tmp++;
9108 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
9109 if (*tmp == '\'') {
9110 tmp++;
9111 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
9112 if (tmp < ctxt->input->end) tmp++;
9113 } else if (*tmp == '"') {
9114 tmp++;
9115 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
9116 if (tmp < ctxt->input->end) tmp++;
9117 } else
9118 tmp++;
9119 }
9120 if (tmp < ctxt->input->end)
9121 *lastgt = tmp;
9122 else {
9123 tmp = *lastlt;
9124 tmp--;
9125 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
9126 if (tmp >= ctxt->input->base)
9127 *lastgt = tmp;
9128 else
9129 *lastgt = NULL;
9130 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009131 }
Daniel Veillarda880b122003-04-21 21:36:41 +00009132 } else {
9133 *lastlt = NULL;
9134 *lastgt = NULL;
9135 }
9136}
9137/**
Owen Taylor3473f882001-02-23 17:55:21 +00009138 * xmlParseTryOrFinish:
9139 * @ctxt: an XML parser context
9140 * @terminate: last chunk indicator
9141 *
9142 * Try to progress on parsing
9143 *
9144 * Returns zero if no parsing was possible
9145 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00009146static int
Owen Taylor3473f882001-02-23 17:55:21 +00009147xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
9148 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009149 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009150 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +00009151 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +00009152
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009153 if (ctxt->input == NULL)
9154 return(0);
9155
Owen Taylor3473f882001-02-23 17:55:21 +00009156#ifdef DEBUG_PUSH
9157 switch (ctxt->instate) {
9158 case XML_PARSER_EOF:
9159 xmlGenericError(xmlGenericErrorContext,
9160 "PP: try EOF\n"); break;
9161 case XML_PARSER_START:
9162 xmlGenericError(xmlGenericErrorContext,
9163 "PP: try START\n"); break;
9164 case XML_PARSER_MISC:
9165 xmlGenericError(xmlGenericErrorContext,
9166 "PP: try MISC\n");break;
9167 case XML_PARSER_COMMENT:
9168 xmlGenericError(xmlGenericErrorContext,
9169 "PP: try COMMENT\n");break;
9170 case XML_PARSER_PROLOG:
9171 xmlGenericError(xmlGenericErrorContext,
9172 "PP: try PROLOG\n");break;
9173 case XML_PARSER_START_TAG:
9174 xmlGenericError(xmlGenericErrorContext,
9175 "PP: try START_TAG\n");break;
9176 case XML_PARSER_CONTENT:
9177 xmlGenericError(xmlGenericErrorContext,
9178 "PP: try CONTENT\n");break;
9179 case XML_PARSER_CDATA_SECTION:
9180 xmlGenericError(xmlGenericErrorContext,
9181 "PP: try CDATA_SECTION\n");break;
9182 case XML_PARSER_END_TAG:
9183 xmlGenericError(xmlGenericErrorContext,
9184 "PP: try END_TAG\n");break;
9185 case XML_PARSER_ENTITY_DECL:
9186 xmlGenericError(xmlGenericErrorContext,
9187 "PP: try ENTITY_DECL\n");break;
9188 case XML_PARSER_ENTITY_VALUE:
9189 xmlGenericError(xmlGenericErrorContext,
9190 "PP: try ENTITY_VALUE\n");break;
9191 case XML_PARSER_ATTRIBUTE_VALUE:
9192 xmlGenericError(xmlGenericErrorContext,
9193 "PP: try ATTRIBUTE_VALUE\n");break;
9194 case XML_PARSER_DTD:
9195 xmlGenericError(xmlGenericErrorContext,
9196 "PP: try DTD\n");break;
9197 case XML_PARSER_EPILOG:
9198 xmlGenericError(xmlGenericErrorContext,
9199 "PP: try EPILOG\n");break;
9200 case XML_PARSER_PI:
9201 xmlGenericError(xmlGenericErrorContext,
9202 "PP: try PI\n");break;
9203 case XML_PARSER_IGNORE:
9204 xmlGenericError(xmlGenericErrorContext,
9205 "PP: try IGNORE\n");break;
9206 }
9207#endif
9208
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009209 if ((ctxt->input != NULL) &&
9210 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009211 xmlSHRINK(ctxt);
9212 ctxt->checkIndex = 0;
9213 }
9214 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +00009215
Daniel Veillarda880b122003-04-21 21:36:41 +00009216 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +00009217 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00009218 return(0);
9219
9220
Owen Taylor3473f882001-02-23 17:55:21 +00009221 /*
9222 * Pop-up of finished entities.
9223 */
9224 while ((RAW == 0) && (ctxt->inputNr > 1))
9225 xmlPopInput(ctxt);
9226
Daniel Veillard198c1bf2003-10-20 17:07:41 +00009227 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +00009228 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009229 avail = ctxt->input->length -
9230 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00009231 else {
9232 /*
9233 * If we are operating on converted input, try to flush
9234 * remainng chars to avoid them stalling in the non-converted
9235 * buffer.
9236 */
9237 if ((ctxt->input->buf->raw != NULL) &&
9238 (ctxt->input->buf->raw->use > 0)) {
9239 int base = ctxt->input->base -
9240 ctxt->input->buf->buffer->content;
9241 int current = ctxt->input->cur - ctxt->input->base;
9242
9243 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
9244 ctxt->input->base = ctxt->input->buf->buffer->content + base;
9245 ctxt->input->cur = ctxt->input->base + current;
9246 ctxt->input->end =
9247 &ctxt->input->buf->buffer->content[
9248 ctxt->input->buf->buffer->use];
9249 }
9250 avail = ctxt->input->buf->buffer->use -
9251 (ctxt->input->cur - ctxt->input->base);
9252 }
Owen Taylor3473f882001-02-23 17:55:21 +00009253 if (avail < 1)
9254 goto done;
9255 switch (ctxt->instate) {
9256 case XML_PARSER_EOF:
9257 /*
9258 * Document parsing is done !
9259 */
9260 goto done;
9261 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009262 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
9263 xmlChar start[4];
9264 xmlCharEncoding enc;
9265
9266 /*
9267 * Very first chars read from the document flow.
9268 */
9269 if (avail < 4)
9270 goto done;
9271
9272 /*
9273 * Get the 4 first bytes and decode the charset
9274 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +00009275 * plug some encoding conversion routines,
9276 * else xmlSwitchEncoding will set to (default)
9277 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009278 */
9279 start[0] = RAW;
9280 start[1] = NXT(1);
9281 start[2] = NXT(2);
9282 start[3] = NXT(3);
9283 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +00009284 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +00009285 break;
9286 }
Owen Taylor3473f882001-02-23 17:55:21 +00009287
Daniel Veillard2b8c4a12003-10-02 22:28:19 +00009288 if (avail < 2)
9289 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00009290 cur = ctxt->input->cur[0];
9291 next = ctxt->input->cur[1];
9292 if (cur == 0) {
9293 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9294 ctxt->sax->setDocumentLocator(ctxt->userData,
9295 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009297 ctxt->instate = XML_PARSER_EOF;
9298#ifdef DEBUG_PUSH
9299 xmlGenericError(xmlGenericErrorContext,
9300 "PP: entering EOF\n");
9301#endif
9302 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9303 ctxt->sax->endDocument(ctxt->userData);
9304 goto done;
9305 }
9306 if ((cur == '<') && (next == '?')) {
9307 /* PI or XML decl */
9308 if (avail < 5) return(ret);
9309 if ((!terminate) &&
9310 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9311 return(ret);
9312 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9313 ctxt->sax->setDocumentLocator(ctxt->userData,
9314 &xmlDefaultSAXLocator);
9315 if ((ctxt->input->cur[2] == 'x') &&
9316 (ctxt->input->cur[3] == 'm') &&
9317 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +00009318 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009319 ret += 5;
9320#ifdef DEBUG_PUSH
9321 xmlGenericError(xmlGenericErrorContext,
9322 "PP: Parsing XML Decl\n");
9323#endif
9324 xmlParseXMLDecl(ctxt);
9325 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9326 /*
9327 * The XML REC instructs us to stop parsing right
9328 * here
9329 */
9330 ctxt->instate = XML_PARSER_EOF;
9331 return(0);
9332 }
9333 ctxt->standalone = ctxt->input->standalone;
9334 if ((ctxt->encoding == NULL) &&
9335 (ctxt->input->encoding != NULL))
9336 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9337 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9338 (!ctxt->disableSAX))
9339 ctxt->sax->startDocument(ctxt->userData);
9340 ctxt->instate = XML_PARSER_MISC;
9341#ifdef DEBUG_PUSH
9342 xmlGenericError(xmlGenericErrorContext,
9343 "PP: entering MISC\n");
9344#endif
9345 } else {
9346 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9347 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9348 (!ctxt->disableSAX))
9349 ctxt->sax->startDocument(ctxt->userData);
9350 ctxt->instate = XML_PARSER_MISC;
9351#ifdef DEBUG_PUSH
9352 xmlGenericError(xmlGenericErrorContext,
9353 "PP: entering MISC\n");
9354#endif
9355 }
9356 } else {
9357 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9358 ctxt->sax->setDocumentLocator(ctxt->userData,
9359 &xmlDefaultSAXLocator);
9360 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +00009361 if (ctxt->version == NULL) {
9362 xmlErrMemory(ctxt, NULL);
9363 break;
9364 }
Owen Taylor3473f882001-02-23 17:55:21 +00009365 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9366 (!ctxt->disableSAX))
9367 ctxt->sax->startDocument(ctxt->userData);
9368 ctxt->instate = XML_PARSER_MISC;
9369#ifdef DEBUG_PUSH
9370 xmlGenericError(xmlGenericErrorContext,
9371 "PP: entering MISC\n");
9372#endif
9373 }
9374 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009375 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009376 const xmlChar *name;
9377 const xmlChar *prefix;
9378 const xmlChar *URI;
9379 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +00009380
9381 if ((avail < 2) && (ctxt->inputNr == 1))
9382 goto done;
9383 cur = ctxt->input->cur[0];
9384 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009385 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +00009386 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009387 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9388 ctxt->sax->endDocument(ctxt->userData);
9389 goto done;
9390 }
9391 if (!terminate) {
9392 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +00009393 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +00009394 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009395 goto done;
9396 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9397 goto done;
9398 }
9399 }
9400 if (ctxt->spaceNr == 0)
9401 spacePush(ctxt, -1);
9402 else
9403 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +00009404#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009405 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009406#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009407 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009408#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009409 else
9410 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009411#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009412 if (name == NULL) {
9413 spacePop(ctxt);
9414 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +00009415 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9416 ctxt->sax->endDocument(ctxt->userData);
9417 goto done;
9418 }
Daniel Veillard4432df22003-09-28 18:58:27 +00009419#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +00009420 /*
9421 * [ VC: Root Element Type ]
9422 * The Name in the document type declaration must match
9423 * the element type of the root element.
9424 */
9425 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9426 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9427 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009428#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009429
9430 /*
9431 * Check for an Empty Element.
9432 */
9433 if ((RAW == '/') && (NXT(1) == '>')) {
9434 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435
9436 if (ctxt->sax2) {
9437 if ((ctxt->sax != NULL) &&
9438 (ctxt->sax->endElementNs != NULL) &&
9439 (!ctxt->disableSAX))
9440 ctxt->sax->endElementNs(ctxt->userData, name,
9441 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +00009442 if (ctxt->nsNr - nsNr > 0)
9443 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009444#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009445 } else {
9446 if ((ctxt->sax != NULL) &&
9447 (ctxt->sax->endElement != NULL) &&
9448 (!ctxt->disableSAX))
9449 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009450#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +00009451 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 spacePop(ctxt);
9453 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009454 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009455 } else {
9456 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009457 }
9458 break;
9459 }
9460 if (RAW == '>') {
9461 NEXT;
9462 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009463 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +00009464 "Couldn't find end of Start Tag %s\n",
9465 name);
Daniel Veillarda880b122003-04-21 21:36:41 +00009466 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009467 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00009468 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 if (ctxt->sax2)
9470 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +00009471#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00009472 else
9473 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009474#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009475
Daniel Veillarda880b122003-04-21 21:36:41 +00009476 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009477 break;
9478 }
9479 case XML_PARSER_CONTENT: {
9480 const xmlChar *test;
9481 unsigned int cons;
9482 if ((avail < 2) && (ctxt->inputNr == 1))
9483 goto done;
9484 cur = ctxt->input->cur[0];
9485 next = ctxt->input->cur[1];
9486
9487 test = CUR_PTR;
9488 cons = ctxt->input->consumed;
9489 if ((cur == '<') && (next == '/')) {
9490 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009491 break;
9492 } else if ((cur == '<') && (next == '?')) {
9493 if ((!terminate) &&
9494 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9495 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009496 xmlParsePI(ctxt);
9497 } else if ((cur == '<') && (next != '!')) {
9498 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009499 break;
9500 } else if ((cur == '<') && (next == '!') &&
9501 (ctxt->input->cur[2] == '-') &&
9502 (ctxt->input->cur[3] == '-')) {
9503 if ((!terminate) &&
9504 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9505 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009506 xmlParseComment(ctxt);
9507 ctxt->instate = XML_PARSER_CONTENT;
9508 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9509 (ctxt->input->cur[2] == '[') &&
9510 (ctxt->input->cur[3] == 'C') &&
9511 (ctxt->input->cur[4] == 'D') &&
9512 (ctxt->input->cur[5] == 'A') &&
9513 (ctxt->input->cur[6] == 'T') &&
9514 (ctxt->input->cur[7] == 'A') &&
9515 (ctxt->input->cur[8] == '[')) {
9516 SKIP(9);
9517 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +00009518 break;
9519 } else if ((cur == '<') && (next == '!') &&
9520 (avail < 9)) {
9521 goto done;
9522 } else if (cur == '&') {
9523 if ((!terminate) &&
9524 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
9525 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +00009526 xmlParseReference(ctxt);
9527 } else {
9528 /* TODO Avoid the extra copy, handle directly !!! */
9529 /*
9530 * Goal of the following test is:
9531 * - minimize calls to the SAX 'character' callback
9532 * when they are mergeable
9533 * - handle an problem for isBlank when we only parse
9534 * a sequence of blank chars and the next one is
9535 * not available to check against '<' presence.
9536 * - tries to homogenize the differences in SAX
9537 * callbacks between the push and pull versions
9538 * of the parser.
9539 */
9540 if ((ctxt->inputNr == 1) &&
9541 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
9542 if (!terminate) {
9543 if (ctxt->progressive) {
9544 if ((lastlt == NULL) ||
9545 (ctxt->input->cur > lastlt))
9546 goto done;
9547 } else if (xmlParseLookupSequence(ctxt,
9548 '<', 0, 0) < 0) {
9549 goto done;
9550 }
9551 }
9552 }
9553 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +00009554 xmlParseCharData(ctxt, 0);
9555 }
9556 /*
9557 * Pop-up of finished entities.
9558 */
9559 while ((RAW == 0) && (ctxt->inputNr > 1))
9560 xmlPopInput(ctxt);
9561 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009562 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9563 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +00009564 ctxt->instate = XML_PARSER_EOF;
9565 break;
9566 }
9567 break;
9568 }
9569 case XML_PARSER_END_TAG:
9570 if (avail < 2)
9571 goto done;
9572 if (!terminate) {
9573 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +00009574 /* > can be found unescaped in attribute values */
9575 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +00009576 goto done;
9577 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
9578 goto done;
9579 }
9580 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009581 if (ctxt->sax2) {
9582 xmlParseEndTag2(ctxt,
9583 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
9584 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009585 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009586 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009587 }
9588#ifdef LIBXML_SAX1_ENABLED
9589 else
Daniel Veillarde57ec792003-09-10 10:50:59 +00009590 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +00009591#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009592 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +00009593 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009594 } else {
9595 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +00009596 }
9597 break;
9598 case XML_PARSER_CDATA_SECTION: {
9599 /*
9600 * The Push mode need to have the SAX callback for
9601 * cdataBlock merge back contiguous callbacks.
9602 */
9603 int base;
9604
9605 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
9606 if (base < 0) {
9607 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
9608 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9609 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009610 ctxt->sax->cdataBlock(ctxt->userData,
9611 ctxt->input->cur,
9612 XML_PARSER_BIG_BUFFER_SIZE);
9613 else if (ctxt->sax->characters != NULL)
9614 ctxt->sax->characters(ctxt->userData,
9615 ctxt->input->cur,
Daniel Veillarda880b122003-04-21 21:36:41 +00009616 XML_PARSER_BIG_BUFFER_SIZE);
9617 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009618 SKIPL(XML_PARSER_BIG_BUFFER_SIZE);
Daniel Veillarda880b122003-04-21 21:36:41 +00009619 ctxt->checkIndex = 0;
9620 }
9621 goto done;
9622 } else {
9623 if ((ctxt->sax != NULL) && (base > 0) &&
9624 (!ctxt->disableSAX)) {
9625 if (ctxt->sax->cdataBlock != NULL)
9626 ctxt->sax->cdataBlock(ctxt->userData,
9627 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +00009628 else if (ctxt->sax->characters != NULL)
9629 ctxt->sax->characters(ctxt->userData,
9630 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +00009631 }
Daniel Veillard0b787f32004-03-26 17:29:53 +00009632 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +00009633 ctxt->checkIndex = 0;
9634 ctxt->instate = XML_PARSER_CONTENT;
9635#ifdef DEBUG_PUSH
9636 xmlGenericError(xmlGenericErrorContext,
9637 "PP: entering CONTENT\n");
9638#endif
9639 }
9640 break;
9641 }
Owen Taylor3473f882001-02-23 17:55:21 +00009642 case XML_PARSER_MISC:
9643 SKIP_BLANKS;
9644 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +00009645 avail = ctxt->input->length -
9646 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009647 else
Daniel Veillarda880b122003-04-21 21:36:41 +00009648 avail = ctxt->input->buf->buffer->use -
9649 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +00009650 if (avail < 2)
9651 goto done;
9652 cur = ctxt->input->cur[0];
9653 next = ctxt->input->cur[1];
9654 if ((cur == '<') && (next == '?')) {
9655 if ((!terminate) &&
9656 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9657 goto done;
9658#ifdef DEBUG_PUSH
9659 xmlGenericError(xmlGenericErrorContext,
9660 "PP: Parsing PI\n");
9661#endif
9662 xmlParsePI(ctxt);
9663 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009664 (ctxt->input->cur[2] == '-') &&
9665 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009666 if ((!terminate) &&
9667 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9668 goto done;
9669#ifdef DEBUG_PUSH
9670 xmlGenericError(xmlGenericErrorContext,
9671 "PP: Parsing Comment\n");
9672#endif
9673 xmlParseComment(ctxt);
9674 ctxt->instate = XML_PARSER_MISC;
9675 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +00009676 (ctxt->input->cur[2] == 'D') &&
9677 (ctxt->input->cur[3] == 'O') &&
9678 (ctxt->input->cur[4] == 'C') &&
9679 (ctxt->input->cur[5] == 'T') &&
9680 (ctxt->input->cur[6] == 'Y') &&
9681 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009682 (ctxt->input->cur[8] == 'E')) {
9683 if ((!terminate) &&
9684 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
9685 goto done;
9686#ifdef DEBUG_PUSH
9687 xmlGenericError(xmlGenericErrorContext,
9688 "PP: Parsing internal subset\n");
9689#endif
9690 ctxt->inSubset = 1;
9691 xmlParseDocTypeDecl(ctxt);
9692 if (RAW == '[') {
9693 ctxt->instate = XML_PARSER_DTD;
9694#ifdef DEBUG_PUSH
9695 xmlGenericError(xmlGenericErrorContext,
9696 "PP: entering DTD\n");
9697#endif
9698 } else {
9699 /*
9700 * Create and update the external subset.
9701 */
9702 ctxt->inSubset = 2;
9703 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9704 (ctxt->sax->externalSubset != NULL))
9705 ctxt->sax->externalSubset(ctxt->userData,
9706 ctxt->intSubName, ctxt->extSubSystem,
9707 ctxt->extSubURI);
9708 ctxt->inSubset = 0;
9709 ctxt->instate = XML_PARSER_PROLOG;
9710#ifdef DEBUG_PUSH
9711 xmlGenericError(xmlGenericErrorContext,
9712 "PP: entering PROLOG\n");
9713#endif
9714 }
9715 } else if ((cur == '<') && (next == '!') &&
9716 (avail < 9)) {
9717 goto done;
9718 } else {
9719 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +00009720 ctxt->progressive = 1;
9721 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009722#ifdef DEBUG_PUSH
9723 xmlGenericError(xmlGenericErrorContext,
9724 "PP: entering START_TAG\n");
9725#endif
9726 }
9727 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009728 case XML_PARSER_PROLOG:
9729 SKIP_BLANKS;
9730 if (ctxt->input->buf == NULL)
9731 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9732 else
9733 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9734 if (avail < 2)
9735 goto done;
9736 cur = ctxt->input->cur[0];
9737 next = ctxt->input->cur[1];
9738 if ((cur == '<') && (next == '?')) {
9739 if ((!terminate) &&
9740 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9741 goto done;
9742#ifdef DEBUG_PUSH
9743 xmlGenericError(xmlGenericErrorContext,
9744 "PP: Parsing PI\n");
9745#endif
9746 xmlParsePI(ctxt);
9747 } else if ((cur == '<') && (next == '!') &&
9748 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9749 if ((!terminate) &&
9750 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9751 goto done;
9752#ifdef DEBUG_PUSH
9753 xmlGenericError(xmlGenericErrorContext,
9754 "PP: Parsing Comment\n");
9755#endif
9756 xmlParseComment(ctxt);
9757 ctxt->instate = XML_PARSER_PROLOG;
9758 } else if ((cur == '<') && (next == '!') &&
9759 (avail < 4)) {
9760 goto done;
9761 } else {
9762 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00009763 if (ctxt->progressive == 0)
9764 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +00009765 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +00009766#ifdef DEBUG_PUSH
9767 xmlGenericError(xmlGenericErrorContext,
9768 "PP: entering START_TAG\n");
9769#endif
9770 }
9771 break;
9772 case XML_PARSER_EPILOG:
9773 SKIP_BLANKS;
9774 if (ctxt->input->buf == NULL)
9775 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
9776 else
9777 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
9778 if (avail < 2)
9779 goto done;
9780 cur = ctxt->input->cur[0];
9781 next = ctxt->input->cur[1];
9782 if ((cur == '<') && (next == '?')) {
9783 if ((!terminate) &&
9784 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
9785 goto done;
9786#ifdef DEBUG_PUSH
9787 xmlGenericError(xmlGenericErrorContext,
9788 "PP: Parsing PI\n");
9789#endif
9790 xmlParsePI(ctxt);
9791 ctxt->instate = XML_PARSER_EPILOG;
9792 } else if ((cur == '<') && (next == '!') &&
9793 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
9794 if ((!terminate) &&
9795 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
9796 goto done;
9797#ifdef DEBUG_PUSH
9798 xmlGenericError(xmlGenericErrorContext,
9799 "PP: Parsing Comment\n");
9800#endif
9801 xmlParseComment(ctxt);
9802 ctxt->instate = XML_PARSER_EPILOG;
9803 } else if ((cur == '<') && (next == '!') &&
9804 (avail < 4)) {
9805 goto done;
9806 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009807 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009808 ctxt->instate = XML_PARSER_EOF;
9809#ifdef DEBUG_PUSH
9810 xmlGenericError(xmlGenericErrorContext,
9811 "PP: entering EOF\n");
9812#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00009813 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00009814 ctxt->sax->endDocument(ctxt->userData);
9815 goto done;
9816 }
9817 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009818 case XML_PARSER_DTD: {
9819 /*
9820 * Sorry but progressive parsing of the internal subset
9821 * is not expected to be supported. We first check that
9822 * the full content of the internal subset is available and
9823 * the parsing is launched only at that point.
9824 * Internal subset ends up with "']' S? '>'" in an unescaped
9825 * section and not in a ']]>' sequence which are conditional
9826 * sections (whoever argued to keep that crap in XML deserve
9827 * a place in hell !).
9828 */
9829 int base, i;
9830 xmlChar *buf;
9831 xmlChar quote = 0;
9832
9833 base = ctxt->input->cur - ctxt->input->base;
9834 if (base < 0) return(0);
9835 if (ctxt->checkIndex > base)
9836 base = ctxt->checkIndex;
9837 buf = ctxt->input->buf->buffer->content;
9838 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
9839 base++) {
9840 if (quote != 0) {
9841 if (buf[base] == quote)
9842 quote = 0;
9843 continue;
9844 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009845 if ((quote == 0) && (buf[base] == '<')) {
9846 int found = 0;
9847 /* special handling of comments */
9848 if (((unsigned int) base + 4 <
9849 ctxt->input->buf->buffer->use) &&
9850 (buf[base + 1] == '!') &&
9851 (buf[base + 2] == '-') &&
9852 (buf[base + 3] == '-')) {
9853 for (;(unsigned int) base + 3 <
9854 ctxt->input->buf->buffer->use; base++) {
9855 if ((buf[base] == '-') &&
9856 (buf[base + 1] == '-') &&
9857 (buf[base + 2] == '>')) {
9858 found = 1;
9859 base += 2;
9860 break;
9861 }
9862 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009863 if (!found) {
9864#if 0
9865 fprintf(stderr, "unfinished comment\n");
9866#endif
9867 break; /* for */
9868 }
Daniel Veillard036143b2004-02-12 11:57:52 +00009869 continue;
9870 }
9871 }
Owen Taylor3473f882001-02-23 17:55:21 +00009872 if (buf[base] == '"') {
9873 quote = '"';
9874 continue;
9875 }
9876 if (buf[base] == '\'') {
9877 quote = '\'';
9878 continue;
9879 }
9880 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009881#if 0
9882 fprintf(stderr, "%c%c%c%c: ", buf[base],
9883 buf[base + 1], buf[base + 2], buf[base + 3]);
9884#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009885 if ((unsigned int) base +1 >=
9886 ctxt->input->buf->buffer->use)
9887 break;
9888 if (buf[base + 1] == ']') {
9889 /* conditional crap, skip both ']' ! */
9890 base++;
9891 continue;
9892 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009893 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00009894 (unsigned int) base + i < ctxt->input->buf->buffer->use;
9895 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009896 if (buf[base + i] == '>') {
9897#if 0
9898 fprintf(stderr, "found\n");
9899#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009900 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009901 }
9902 if (!IS_BLANK_CH(buf[base + i])) {
9903#if 0
9904 fprintf(stderr, "not found\n");
9905#endif
9906 goto not_end_of_int_subset;
9907 }
Owen Taylor3473f882001-02-23 17:55:21 +00009908 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009909#if 0
9910 fprintf(stderr, "end of stream\n");
9911#endif
Owen Taylor3473f882001-02-23 17:55:21 +00009912 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009913
Owen Taylor3473f882001-02-23 17:55:21 +00009914 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +00009915not_end_of_int_subset:
9916 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +00009917 }
9918 /*
9919 * We didn't found the end of the Internal subset
9920 */
Owen Taylor3473f882001-02-23 17:55:21 +00009921#ifdef DEBUG_PUSH
9922 if (next == 0)
9923 xmlGenericError(xmlGenericErrorContext,
9924 "PP: lookup of int subset end filed\n");
9925#endif
9926 goto done;
9927
9928found_end_int_subset:
9929 xmlParseInternalSubset(ctxt);
9930 ctxt->inSubset = 2;
9931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9932 (ctxt->sax->externalSubset != NULL))
9933 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9934 ctxt->extSubSystem, ctxt->extSubURI);
9935 ctxt->inSubset = 0;
9936 ctxt->instate = XML_PARSER_PROLOG;
9937 ctxt->checkIndex = 0;
9938#ifdef DEBUG_PUSH
9939 xmlGenericError(xmlGenericErrorContext,
9940 "PP: entering PROLOG\n");
9941#endif
9942 break;
9943 }
9944 case XML_PARSER_COMMENT:
9945 xmlGenericError(xmlGenericErrorContext,
9946 "PP: internal error, state == COMMENT\n");
9947 ctxt->instate = XML_PARSER_CONTENT;
9948#ifdef DEBUG_PUSH
9949 xmlGenericError(xmlGenericErrorContext,
9950 "PP: entering CONTENT\n");
9951#endif
9952 break;
Daniel Veillarda880b122003-04-21 21:36:41 +00009953 case XML_PARSER_IGNORE:
9954 xmlGenericError(xmlGenericErrorContext,
9955 "PP: internal error, state == IGNORE");
9956 ctxt->instate = XML_PARSER_DTD;
9957#ifdef DEBUG_PUSH
9958 xmlGenericError(xmlGenericErrorContext,
9959 "PP: entering DTD\n");
9960#endif
9961 break;
Owen Taylor3473f882001-02-23 17:55:21 +00009962 case XML_PARSER_PI:
9963 xmlGenericError(xmlGenericErrorContext,
9964 "PP: internal error, state == PI\n");
9965 ctxt->instate = XML_PARSER_CONTENT;
9966#ifdef DEBUG_PUSH
9967 xmlGenericError(xmlGenericErrorContext,
9968 "PP: entering CONTENT\n");
9969#endif
9970 break;
9971 case XML_PARSER_ENTITY_DECL:
9972 xmlGenericError(xmlGenericErrorContext,
9973 "PP: internal error, state == ENTITY_DECL\n");
9974 ctxt->instate = XML_PARSER_DTD;
9975#ifdef DEBUG_PUSH
9976 xmlGenericError(xmlGenericErrorContext,
9977 "PP: entering DTD\n");
9978#endif
9979 break;
9980 case XML_PARSER_ENTITY_VALUE:
9981 xmlGenericError(xmlGenericErrorContext,
9982 "PP: internal error, state == ENTITY_VALUE\n");
9983 ctxt->instate = XML_PARSER_CONTENT;
9984#ifdef DEBUG_PUSH
9985 xmlGenericError(xmlGenericErrorContext,
9986 "PP: entering DTD\n");
9987#endif
9988 break;
9989 case XML_PARSER_ATTRIBUTE_VALUE:
9990 xmlGenericError(xmlGenericErrorContext,
9991 "PP: internal error, state == ATTRIBUTE_VALUE\n");
9992 ctxt->instate = XML_PARSER_START_TAG;
9993#ifdef DEBUG_PUSH
9994 xmlGenericError(xmlGenericErrorContext,
9995 "PP: entering START_TAG\n");
9996#endif
9997 break;
9998 case XML_PARSER_SYSTEM_LITERAL:
9999 xmlGenericError(xmlGenericErrorContext,
10000 "PP: internal error, state == SYSTEM_LITERAL\n");
10001 ctxt->instate = XML_PARSER_START_TAG;
10002#ifdef DEBUG_PUSH
10003 xmlGenericError(xmlGenericErrorContext,
10004 "PP: entering START_TAG\n");
10005#endif
10006 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000010007 case XML_PARSER_PUBLIC_LITERAL:
10008 xmlGenericError(xmlGenericErrorContext,
10009 "PP: internal error, state == PUBLIC_LITERAL\n");
10010 ctxt->instate = XML_PARSER_START_TAG;
10011#ifdef DEBUG_PUSH
10012 xmlGenericError(xmlGenericErrorContext,
10013 "PP: entering START_TAG\n");
10014#endif
10015 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010016 }
10017 }
10018done:
10019#ifdef DEBUG_PUSH
10020 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
10021#endif
10022 return(ret);
10023}
10024
10025/**
Owen Taylor3473f882001-02-23 17:55:21 +000010026 * xmlParseChunk:
10027 * @ctxt: an XML parser context
10028 * @chunk: an char array
10029 * @size: the size in byte of the chunk
10030 * @terminate: last chunk indicator
10031 *
10032 * Parse a Chunk of memory
10033 *
10034 * Returns zero if no error, the xmlParserErrors otherwise.
10035 */
10036int
10037xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10038 int terminate) {
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010039 if (ctxt == NULL)
10040 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000010041 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010042 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000010043 if (ctxt->instate == XML_PARSER_START)
10044 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010045 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10046 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10047 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10048 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000010049 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000010050
William M. Bracka3215c72004-07-31 16:24:01 +000010051 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10052 if (res < 0) {
10053 ctxt->errNo = XML_PARSER_EOF;
10054 ctxt->disableSAX = 1;
10055 return (XML_PARSER_EOF);
10056 }
Owen Taylor3473f882001-02-23 17:55:21 +000010057 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10058 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010059 ctxt->input->end =
10060 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010061#ifdef DEBUG_PUSH
10062 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10063#endif
10064
Owen Taylor3473f882001-02-23 17:55:21 +000010065 } else if (ctxt->instate != XML_PARSER_EOF) {
10066 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
10067 xmlParserInputBufferPtr in = ctxt->input->buf;
10068 if ((in->encoder != NULL) && (in->buffer != NULL) &&
10069 (in->raw != NULL)) {
10070 int nbchars;
10071
10072 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
10073 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010074 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000010075 xmlGenericError(xmlGenericErrorContext,
10076 "xmlParseChunk: encoder error\n");
10077 return(XML_ERR_INVALID_ENCODING);
10078 }
10079 }
10080 }
10081 }
10082 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillard14412512005-01-21 23:53:26 +000010083 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010084 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000010085 if (terminate) {
10086 /*
10087 * Check for termination
10088 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010089 int avail = 0;
10090
10091 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010092 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010093 avail = ctxt->input->length -
10094 (ctxt->input->cur - ctxt->input->base);
10095 else
10096 avail = ctxt->input->buf->buffer->use -
10097 (ctxt->input->cur - ctxt->input->base);
10098 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010099
Owen Taylor3473f882001-02-23 17:55:21 +000010100 if ((ctxt->instate != XML_PARSER_EOF) &&
10101 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010103 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010104 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010105 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000010106 }
Owen Taylor3473f882001-02-23 17:55:21 +000010107 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010108 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010109 ctxt->sax->endDocument(ctxt->userData);
10110 }
10111 ctxt->instate = XML_PARSER_EOF;
10112 }
10113 return((xmlParserErrors) ctxt->errNo);
10114}
10115
10116/************************************************************************
10117 * *
10118 * I/O front end functions to the parser *
10119 * *
10120 ************************************************************************/
10121
10122/**
10123 * xmlStopParser:
10124 * @ctxt: an XML parser context
10125 *
10126 * Blocks further parser processing
10127 */
10128void
10129xmlStopParser(xmlParserCtxtPtr ctxt) {
Daniel Veillard157fee02003-10-31 10:36:03 +000010130 if (ctxt == NULL)
10131 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010132 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard157fee02003-10-31 10:36:03 +000010133 ctxt->disableSAX = 1;
William M. Brack230c5502004-12-20 16:18:49 +000010134 if (ctxt->input != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010135 ctxt->input->cur = BAD_CAST"";
William M. Brack230c5502004-12-20 16:18:49 +000010136 ctxt->input->base = ctxt->input->cur;
10137 }
Owen Taylor3473f882001-02-23 17:55:21 +000010138}
10139
10140/**
10141 * xmlCreatePushParserCtxt:
10142 * @sax: a SAX handler
10143 * @user_data: The user data returned on SAX callbacks
10144 * @chunk: a pointer to an array of chars
10145 * @size: number of chars in the array
10146 * @filename: an optional file name or URI
10147 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000010148 * Create a parser context for using the XML parser in push mode.
10149 * If @buffer and @size are non-NULL, the data is used to detect
10150 * the encoding. The remaining characters will be parsed so they
10151 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000010152 * To allow content encoding detection, @size should be >= 4
10153 * The value of @filename is used for fetching external entities
10154 * and error/warning reports.
10155 *
10156 * Returns the new parser context or NULL
10157 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000010158
Owen Taylor3473f882001-02-23 17:55:21 +000010159xmlParserCtxtPtr
10160xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10161 const char *chunk, int size, const char *filename) {
10162 xmlParserCtxtPtr ctxt;
10163 xmlParserInputPtr inputStream;
10164 xmlParserInputBufferPtr buf;
10165 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10166
10167 /*
10168 * plug some encoding conversion routines
10169 */
10170 if ((chunk != NULL) && (size >= 4))
10171 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
10172
10173 buf = xmlAllocParserInputBuffer(enc);
10174 if (buf == NULL) return(NULL);
10175
10176 ctxt = xmlNewParserCtxt();
10177 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000010178 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010179 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010180 return(NULL);
10181 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010182 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010183 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
10184 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010185 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010186 xmlFreeParserInputBuffer(buf);
10187 xmlFreeParserCtxt(ctxt);
10188 return(NULL);
10189 }
Owen Taylor3473f882001-02-23 17:55:21 +000010190 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010191#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010192 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010193#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010194 xmlFree(ctxt->sax);
10195 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10196 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010197 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010198 xmlFreeParserInputBuffer(buf);
10199 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010200 return(NULL);
10201 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010202 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10203 if (sax->initialized == XML_SAX2_MAGIC)
10204 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10205 else
10206 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010207 if (user_data != NULL)
10208 ctxt->userData = user_data;
10209 }
10210 if (filename == NULL) {
10211 ctxt->directory = NULL;
10212 } else {
10213 ctxt->directory = xmlParserGetDirectory(filename);
10214 }
10215
10216 inputStream = xmlNewInputStream(ctxt);
10217 if (inputStream == NULL) {
10218 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010219 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010220 return(NULL);
10221 }
10222
10223 if (filename == NULL)
10224 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000010225 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000010226 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000010227 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000010228 if (inputStream->filename == NULL) {
10229 xmlFreeParserCtxt(ctxt);
10230 xmlFreeParserInputBuffer(buf);
10231 return(NULL);
10232 }
10233 }
Owen Taylor3473f882001-02-23 17:55:21 +000010234 inputStream->buf = buf;
10235 inputStream->base = inputStream->buf->buffer->content;
10236 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010237 inputStream->end =
10238 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010239
10240 inputPush(ctxt, inputStream);
10241
William M. Brack3a1cd212005-02-11 14:35:54 +000010242 /*
10243 * If the caller didn't provide an initial 'chunk' for determining
10244 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
10245 * that it can be automatically determined later
10246 */
10247 if ((size == 0) || (chunk == NULL)) {
10248 ctxt->charset = XML_CHAR_ENCODING_NONE;
10249 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010250 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10251 int cur = ctxt->input->cur - ctxt->input->base;
10252
Owen Taylor3473f882001-02-23 17:55:21 +000010253 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000010254
10255 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10256 ctxt->input->cur = ctxt->input->base + cur;
10257 ctxt->input->end =
10258 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010259#ifdef DEBUG_PUSH
10260 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
10261#endif
10262 }
10263
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010264 if (enc != XML_CHAR_ENCODING_NONE) {
10265 xmlSwitchEncoding(ctxt, enc);
10266 }
10267
Owen Taylor3473f882001-02-23 17:55:21 +000010268 return(ctxt);
10269}
Daniel Veillard73b013f2003-09-30 12:36:01 +000010270#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010271
10272/**
10273 * xmlCreateIOParserCtxt:
10274 * @sax: a SAX handler
10275 * @user_data: The user data returned on SAX callbacks
10276 * @ioread: an I/O read function
10277 * @ioclose: an I/O close function
10278 * @ioctx: an I/O handler
10279 * @enc: the charset encoding if known
10280 *
10281 * Create a parser context for using the XML parser with an existing
10282 * I/O stream
10283 *
10284 * Returns the new parser context or NULL
10285 */
10286xmlParserCtxtPtr
10287xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10288 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10289 void *ioctx, xmlCharEncoding enc) {
10290 xmlParserCtxtPtr ctxt;
10291 xmlParserInputPtr inputStream;
10292 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000010293
10294 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010295
10296 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10297 if (buf == NULL) return(NULL);
10298
10299 ctxt = xmlNewParserCtxt();
10300 if (ctxt == NULL) {
10301 xmlFree(buf);
10302 return(NULL);
10303 }
10304 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000010305#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000010306 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000010307#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010308 xmlFree(ctxt->sax);
10309 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10310 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 xmlFree(ctxt);
10313 return(NULL);
10314 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000010315 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
10316 if (sax->initialized == XML_SAX2_MAGIC)
10317 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10318 else
10319 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000010320 if (user_data != NULL)
10321 ctxt->userData = user_data;
10322 }
10323
10324 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10325 if (inputStream == NULL) {
10326 xmlFreeParserCtxt(ctxt);
10327 return(NULL);
10328 }
10329 inputPush(ctxt, inputStream);
10330
10331 return(ctxt);
10332}
10333
Daniel Veillard4432df22003-09-28 18:58:27 +000010334#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010335/************************************************************************
10336 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010337 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000010338 * *
10339 ************************************************************************/
10340
10341/**
10342 * xmlIOParseDTD:
10343 * @sax: the SAX handler block or NULL
10344 * @input: an Input Buffer
10345 * @enc: the charset encoding if known
10346 *
10347 * Load and parse a DTD
10348 *
10349 * Returns the resulting xmlDtdPtr or NULL in case of error.
10350 * @input will be freed at parsing end.
10351 */
10352
10353xmlDtdPtr
10354xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
10355 xmlCharEncoding enc) {
10356 xmlDtdPtr ret = NULL;
10357 xmlParserCtxtPtr ctxt;
10358 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010359 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000010360
10361 if (input == NULL)
10362 return(NULL);
10363
10364 ctxt = xmlNewParserCtxt();
10365 if (ctxt == NULL) {
10366 return(NULL);
10367 }
10368
10369 /*
10370 * Set-up the SAX context
10371 */
10372 if (sax != NULL) {
10373 if (ctxt->sax != NULL)
10374 xmlFree(ctxt->sax);
10375 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000010376 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010378 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010379
10380 /*
10381 * generate a parser input from the I/O handler
10382 */
10383
Daniel Veillard43caefb2003-12-07 19:32:22 +000010384 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000010385 if (pinput == NULL) {
10386 if (sax != NULL) ctxt->sax = NULL;
10387 xmlFreeParserCtxt(ctxt);
10388 return(NULL);
10389 }
10390
10391 /*
10392 * plug some encoding conversion routines here.
10393 */
10394 xmlPushInput(ctxt, pinput);
Daniel Veillard43caefb2003-12-07 19:32:22 +000010395 if (enc != XML_CHAR_ENCODING_NONE) {
10396 xmlSwitchEncoding(ctxt, enc);
10397 }
Owen Taylor3473f882001-02-23 17:55:21 +000010398
10399 pinput->filename = NULL;
10400 pinput->line = 1;
10401 pinput->col = 1;
10402 pinput->base = ctxt->input->cur;
10403 pinput->cur = ctxt->input->cur;
10404 pinput->free = NULL;
10405
10406 /*
10407 * let's parse that entity knowing it's an external subset.
10408 */
10409 ctxt->inSubset = 2;
10410 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10411 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10412 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000010413
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010414 if ((enc == XML_CHAR_ENCODING_NONE) &&
10415 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000010416 /*
10417 * Get the 4 first bytes and decode the charset
10418 * if enc != XML_CHAR_ENCODING_NONE
10419 * plug some encoding conversion routines.
10420 */
10421 start[0] = RAW;
10422 start[1] = NXT(1);
10423 start[2] = NXT(2);
10424 start[3] = NXT(3);
10425 enc = xmlDetectCharEncoding(start, 4);
10426 if (enc != XML_CHAR_ENCODING_NONE) {
10427 xmlSwitchEncoding(ctxt, enc);
10428 }
10429 }
10430
Owen Taylor3473f882001-02-23 17:55:21 +000010431 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
10432
10433 if (ctxt->myDoc != NULL) {
10434 if (ctxt->wellFormed) {
10435 ret = ctxt->myDoc->extSubset;
10436 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000010437 if (ret != NULL) {
10438 xmlNodePtr tmp;
10439
10440 ret->doc = NULL;
10441 tmp = ret->children;
10442 while (tmp != NULL) {
10443 tmp->doc = NULL;
10444 tmp = tmp->next;
10445 }
10446 }
Owen Taylor3473f882001-02-23 17:55:21 +000010447 } else {
10448 ret = NULL;
10449 }
10450 xmlFreeDoc(ctxt->myDoc);
10451 ctxt->myDoc = NULL;
10452 }
10453 if (sax != NULL) ctxt->sax = NULL;
10454 xmlFreeParserCtxt(ctxt);
10455
10456 return(ret);
10457}
10458
10459/**
10460 * xmlSAXParseDTD:
10461 * @sax: the SAX handler block
10462 * @ExternalID: a NAME* containing the External ID of the DTD
10463 * @SystemID: a NAME* containing the URL to the DTD
10464 *
10465 * Load and parse an external subset.
10466 *
10467 * Returns the resulting xmlDtdPtr or NULL in case of error.
10468 */
10469
10470xmlDtdPtr
10471xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10472 const xmlChar *SystemID) {
10473 xmlDtdPtr ret = NULL;
10474 xmlParserCtxtPtr ctxt;
10475 xmlParserInputPtr input = NULL;
10476 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010477 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000010478
10479 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10480
10481 ctxt = xmlNewParserCtxt();
10482 if (ctxt == NULL) {
10483 return(NULL);
10484 }
10485
10486 /*
10487 * Set-up the SAX context
10488 */
10489 if (sax != NULL) {
10490 if (ctxt->sax != NULL)
10491 xmlFree(ctxt->sax);
10492 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000010493 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000010494 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010495
10496 /*
10497 * Canonicalise the system ID
10498 */
10499 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000010500 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010501 xmlFreeParserCtxt(ctxt);
10502 return(NULL);
10503 }
Owen Taylor3473f882001-02-23 17:55:21 +000010504
10505 /*
10506 * Ask the Entity resolver to load the damn thing
10507 */
10508
10509 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010510 input = ctxt->sax->resolveEntity(ctxt, ExternalID, systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010511 if (input == NULL) {
10512 if (sax != NULL) ctxt->sax = NULL;
10513 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000010514 if (systemIdCanonic != NULL)
10515 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010516 return(NULL);
10517 }
10518
10519 /*
10520 * plug some encoding conversion routines here.
10521 */
10522 xmlPushInput(ctxt, input);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010523 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10524 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
10525 xmlSwitchEncoding(ctxt, enc);
10526 }
Owen Taylor3473f882001-02-23 17:55:21 +000010527
10528 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000010529 input->filename = (char *) systemIdCanonic;
10530 else
10531 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000010532 input->line = 1;
10533 input->col = 1;
10534 input->base = ctxt->input->cur;
10535 input->cur = ctxt->input->cur;
10536 input->free = NULL;
10537
10538 /*
10539 * let's parse that entity knowing it's an external subset.
10540 */
10541 ctxt->inSubset = 2;
10542 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10543 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10544 ExternalID, SystemID);
10545 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10546
10547 if (ctxt->myDoc != NULL) {
10548 if (ctxt->wellFormed) {
10549 ret = ctxt->myDoc->extSubset;
10550 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000010551 if (ret != NULL) {
10552 xmlNodePtr tmp;
10553
10554 ret->doc = NULL;
10555 tmp = ret->children;
10556 while (tmp != NULL) {
10557 tmp->doc = NULL;
10558 tmp = tmp->next;
10559 }
10560 }
Owen Taylor3473f882001-02-23 17:55:21 +000010561 } else {
10562 ret = NULL;
10563 }
10564 xmlFreeDoc(ctxt->myDoc);
10565 ctxt->myDoc = NULL;
10566 }
10567 if (sax != NULL) ctxt->sax = NULL;
10568 xmlFreeParserCtxt(ctxt);
10569
10570 return(ret);
10571}
10572
Daniel Veillard4432df22003-09-28 18:58:27 +000010573
Owen Taylor3473f882001-02-23 17:55:21 +000010574/**
10575 * xmlParseDTD:
10576 * @ExternalID: a NAME* containing the External ID of the DTD
10577 * @SystemID: a NAME* containing the URL to the DTD
10578 *
10579 * Load and parse an external subset.
10580 *
10581 * Returns the resulting xmlDtdPtr or NULL in case of error.
10582 */
10583
10584xmlDtdPtr
10585xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
10586 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10587}
Daniel Veillard4432df22003-09-28 18:58:27 +000010588#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010589
10590/************************************************************************
10591 * *
10592 * Front ends when parsing an Entity *
10593 * *
10594 ************************************************************************/
10595
10596/**
Owen Taylor3473f882001-02-23 17:55:21 +000010597 * xmlParseCtxtExternalEntity:
10598 * @ctx: the existing parsing context
10599 * @URL: the URL for the entity to load
10600 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010601 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000010602 *
10603 * Parse an external general entity within an existing parsing context
10604 * An external general parsed entity is well-formed if it matches the
10605 * production labeled extParsedEnt.
10606 *
10607 * [78] extParsedEnt ::= TextDecl? content
10608 *
10609 * Returns 0 if the entity is well formed, -1 in case of args problem and
10610 * the parser error code otherwise
10611 */
10612
10613int
10614xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010615 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000010616 xmlParserCtxtPtr ctxt;
10617 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010618 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010619 xmlSAXHandlerPtr oldsax = NULL;
10620 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010621 xmlChar start[4];
10622 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010623
Daniel Veillardce682bc2004-11-05 17:22:25 +000010624 if (ctx == NULL) return(-1);
10625
Owen Taylor3473f882001-02-23 17:55:21 +000010626 if (ctx->depth > 40) {
10627 return(XML_ERR_ENTITY_LOOP);
10628 }
10629
Daniel Veillardcda96922001-08-21 10:56:31 +000010630 if (lst != NULL)
10631 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010632 if ((URL == NULL) && (ID == NULL))
10633 return(-1);
10634 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10635 return(-1);
10636
10637
10638 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
10639 if (ctxt == NULL) return(-1);
10640 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000010641 ctxt->_private = ctx->_private;
Owen Taylor3473f882001-02-23 17:55:21 +000010642 oldsax = ctxt->sax;
10643 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010644 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010645 newDoc = xmlNewDoc(BAD_CAST "1.0");
10646 if (newDoc == NULL) {
10647 xmlFreeParserCtxt(ctxt);
10648 return(-1);
10649 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010650 if (ctx->myDoc->dict) {
10651 newDoc->dict = ctx->myDoc->dict;
10652 xmlDictReference(newDoc->dict);
10653 }
Owen Taylor3473f882001-02-23 17:55:21 +000010654 if (ctx->myDoc != NULL) {
10655 newDoc->intSubset = ctx->myDoc->intSubset;
10656 newDoc->extSubset = ctx->myDoc->extSubset;
10657 }
10658 if (ctx->myDoc->URL != NULL) {
10659 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10660 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010661 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10662 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010663 ctxt->sax = oldsax;
10664 xmlFreeParserCtxt(ctxt);
10665 newDoc->intSubset = NULL;
10666 newDoc->extSubset = NULL;
10667 xmlFreeDoc(newDoc);
10668 return(-1);
10669 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010670 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010671 nodePush(ctxt, newDoc->children);
10672 if (ctx->myDoc == NULL) {
10673 ctxt->myDoc = newDoc;
10674 } else {
10675 ctxt->myDoc = ctx->myDoc;
10676 newDoc->children->doc = ctx->myDoc;
10677 }
10678
Daniel Veillard87a764e2001-06-20 17:41:10 +000010679 /*
10680 * Get the 4 first bytes and decode the charset
10681 * if enc != XML_CHAR_ENCODING_NONE
10682 * plug some encoding conversion routines.
10683 */
10684 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010685 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10686 start[0] = RAW;
10687 start[1] = NXT(1);
10688 start[2] = NXT(2);
10689 start[3] = NXT(3);
10690 enc = xmlDetectCharEncoding(start, 4);
10691 if (enc != XML_CHAR_ENCODING_NONE) {
10692 xmlSwitchEncoding(ctxt, enc);
10693 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010694 }
10695
Owen Taylor3473f882001-02-23 17:55:21 +000010696 /*
10697 * Parse a possible text declaration first
10698 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010699 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010700 xmlParseTextDecl(ctxt);
10701 }
10702
10703 /*
10704 * Doing validity checking on chunk doesn't make sense
10705 */
10706 ctxt->instate = XML_PARSER_CONTENT;
10707 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010708 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010709 ctxt->loadsubset = ctx->loadsubset;
10710 ctxt->depth = ctx->depth + 1;
10711 ctxt->replaceEntities = ctx->replaceEntities;
10712 if (ctxt->validate) {
10713 ctxt->vctxt.error = ctx->vctxt.error;
10714 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000010715 } else {
10716 ctxt->vctxt.error = NULL;
10717 ctxt->vctxt.warning = NULL;
10718 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000010719 ctxt->vctxt.nodeTab = NULL;
10720 ctxt->vctxt.nodeNr = 0;
10721 ctxt->vctxt.nodeMax = 0;
10722 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010723 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
10724 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000010725 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
10726 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
10727 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010728 ctxt->dictNames = ctx->dictNames;
10729 ctxt->attsDefault = ctx->attsDefault;
10730 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000010731 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000010732
10733 xmlParseContent(ctxt);
10734
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000010735 ctx->validate = ctxt->validate;
10736 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000010737 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010738 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010739 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010740 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010741 }
10742 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010743 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010744 }
10745
10746 if (!ctxt->wellFormed) {
10747 if (ctxt->errNo == 0)
10748 ret = 1;
10749 else
10750 ret = ctxt->errNo;
10751 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000010752 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010753 xmlNodePtr cur;
10754
10755 /*
10756 * Return the newly created nodeset after unlinking it from
10757 * they pseudo parent.
10758 */
10759 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000010760 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010761 while (cur != NULL) {
10762 cur->parent = NULL;
10763 cur = cur->next;
10764 }
10765 newDoc->children->children = NULL;
10766 }
10767 ret = 0;
10768 }
10769 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000010770 ctxt->dict = NULL;
10771 ctxt->attsDefault = NULL;
10772 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010773 xmlFreeParserCtxt(ctxt);
10774 newDoc->intSubset = NULL;
10775 newDoc->extSubset = NULL;
10776 xmlFreeDoc(newDoc);
10777
10778 return(ret);
10779}
10780
10781/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010782 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000010783 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010784 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000010785 * @sax: the SAX handler bloc (possibly NULL)
10786 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10787 * @depth: Used for loop detection, use 0
10788 * @URL: the URL for the entity to load
10789 * @ID: the System ID for the entity to load
10790 * @list: the return value for the set of parsed nodes
10791 *
Daniel Veillard257d9102001-05-08 10:41:44 +000010792 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000010793 *
10794 * Returns 0 if the entity is well formed, -1 in case of args problem and
10795 * the parser error code otherwise
10796 */
10797
Daniel Veillard7d515752003-09-26 19:12:37 +000010798static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010799xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
10800 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000010801 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010802 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000010803 xmlParserCtxtPtr ctxt;
10804 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010805 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000010806 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000010807 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000010808 xmlChar start[4];
10809 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000010810
10811 if (depth > 40) {
10812 return(XML_ERR_ENTITY_LOOP);
10813 }
10814
10815
10816
10817 if (list != NULL)
10818 *list = NULL;
10819 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000010820 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010821 if (doc == NULL) /* @@ relax but check for dereferences */
Daniel Veillard7d515752003-09-26 19:12:37 +000010822 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010823
10824
10825 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000010826 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000010827 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010828 if (oldctxt != NULL) {
10829 ctxt->_private = oldctxt->_private;
10830 ctxt->loadsubset = oldctxt->loadsubset;
10831 ctxt->validate = oldctxt->validate;
10832 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010833 ctxt->record_info = oldctxt->record_info;
10834 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
10835 ctxt->node_seq.length = oldctxt->node_seq.length;
10836 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010837 } else {
10838 /*
10839 * Doing validity checking on chunk without context
10840 * doesn't make sense
10841 */
10842 ctxt->_private = NULL;
10843 ctxt->validate = 0;
10844 ctxt->external = 2;
10845 ctxt->loadsubset = 0;
10846 }
Owen Taylor3473f882001-02-23 17:55:21 +000010847 if (sax != NULL) {
10848 oldsax = ctxt->sax;
10849 ctxt->sax = sax;
10850 if (user_data != NULL)
10851 ctxt->userData = user_data;
10852 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010853 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010854 newDoc = xmlNewDoc(BAD_CAST "1.0");
10855 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010856 ctxt->node_seq.maximum = 0;
10857 ctxt->node_seq.length = 0;
10858 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010859 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000010860 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010861 }
10862 if (doc != NULL) {
10863 newDoc->intSubset = doc->intSubset;
10864 newDoc->extSubset = doc->extSubset;
Daniel Veillard03a53c32004-10-26 16:06:51 +000010865 newDoc->dict = doc->dict;
10866 } else if (oldctxt != NULL) {
10867 newDoc->dict = oldctxt->dict;
Owen Taylor3473f882001-02-23 17:55:21 +000010868 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000010869 xmlDictReference(newDoc->dict);
10870
Owen Taylor3473f882001-02-23 17:55:21 +000010871 if (doc->URL != NULL) {
10872 newDoc->URL = xmlStrdup(doc->URL);
10873 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010874 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10875 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010876 if (sax != NULL)
10877 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010878 ctxt->node_seq.maximum = 0;
10879 ctxt->node_seq.length = 0;
10880 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010881 xmlFreeParserCtxt(ctxt);
10882 newDoc->intSubset = NULL;
10883 newDoc->extSubset = NULL;
10884 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000010885 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000010886 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010887 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 nodePush(ctxt, newDoc->children);
10889 if (doc == NULL) {
10890 ctxt->myDoc = newDoc;
10891 } else {
10892 ctxt->myDoc = doc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000010893 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000010894 }
10895
Daniel Veillard87a764e2001-06-20 17:41:10 +000010896 /*
10897 * Get the 4 first bytes and decode the charset
10898 * if enc != XML_CHAR_ENCODING_NONE
10899 * plug some encoding conversion routines.
10900 */
10901 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010902 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10903 start[0] = RAW;
10904 start[1] = NXT(1);
10905 start[2] = NXT(2);
10906 start[3] = NXT(3);
10907 enc = xmlDetectCharEncoding(start, 4);
10908 if (enc != XML_CHAR_ENCODING_NONE) {
10909 xmlSwitchEncoding(ctxt, enc);
10910 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000010911 }
10912
Owen Taylor3473f882001-02-23 17:55:21 +000010913 /*
10914 * Parse a possible text declaration first
10915 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010916 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010917 xmlParseTextDecl(ctxt);
10918 }
10919
Owen Taylor3473f882001-02-23 17:55:21 +000010920 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000010921 ctxt->depth = depth;
10922
10923 xmlParseContent(ctxt);
10924
Daniel Veillard561b7f82002-03-20 21:55:57 +000010925 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000010927 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010928 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010929 }
10930 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010931 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010932 }
10933
10934 if (!ctxt->wellFormed) {
10935 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000010936 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000010937 else
William M. Brack7b9154b2003-09-27 19:23:50 +000010938 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000010939 } else {
10940 if (list != NULL) {
10941 xmlNodePtr cur;
10942
10943 /*
10944 * Return the newly created nodeset after unlinking it from
10945 * they pseudo parent.
10946 */
10947 cur = newDoc->children->children;
10948 *list = cur;
10949 while (cur != NULL) {
10950 cur->parent = NULL;
10951 cur = cur->next;
10952 }
10953 newDoc->children->children = NULL;
10954 }
Daniel Veillard7d515752003-09-26 19:12:37 +000010955 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000010956 }
10957 if (sax != NULL)
10958 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000010959 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
10960 oldctxt->node_seq.length = ctxt->node_seq.length;
10961 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000010962 ctxt->node_seq.maximum = 0;
10963 ctxt->node_seq.length = 0;
10964 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010965 xmlFreeParserCtxt(ctxt);
10966 newDoc->intSubset = NULL;
10967 newDoc->extSubset = NULL;
10968 xmlFreeDoc(newDoc);
10969
10970 return(ret);
10971}
10972
Daniel Veillard81273902003-09-30 00:43:48 +000010973#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010974/**
Daniel Veillard257d9102001-05-08 10:41:44 +000010975 * xmlParseExternalEntity:
10976 * @doc: the document the chunk pertains to
10977 * @sax: the SAX handler bloc (possibly NULL)
10978 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10979 * @depth: Used for loop detection, use 0
10980 * @URL: the URL for the entity to load
10981 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000010982 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000010983 *
10984 * Parse an external general entity
10985 * An external general parsed entity is well-formed if it matches the
10986 * production labeled extParsedEnt.
10987 *
10988 * [78] extParsedEnt ::= TextDecl? content
10989 *
10990 * Returns 0 if the entity is well formed, -1 in case of args problem and
10991 * the parser error code otherwise
10992 */
10993
10994int
10995xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000010996 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000010997 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000010998 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000010999}
11000
11001/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000011002 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000011003 * @doc: the document the chunk pertains to
11004 * @sax: the SAX handler bloc (possibly NULL)
11005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11006 * @depth: Used for loop detection, use 0
11007 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000011008 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011009 *
11010 * Parse a well-balanced chunk of an XML document
11011 * called by the parser
11012 * The allowed sequence for the Well Balanced Chunk is the one defined by
11013 * the content production in the XML grammar:
11014 *
11015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11016 *
11017 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11018 * the parser error code otherwise
11019 */
11020
11021int
11022xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000011023 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011024 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
11025 depth, string, lst, 0 );
11026}
Daniel Veillard81273902003-09-30 00:43:48 +000011027#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000011028
11029/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000011030 * xmlParseBalancedChunkMemoryInternal:
11031 * @oldctxt: the existing parsing context
11032 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11033 * @user_data: the user data field for the parser context
11034 * @lst: the return value for the set of parsed nodes
11035 *
11036 *
11037 * Parse a well-balanced chunk of an XML document
11038 * called by the parser
11039 * The allowed sequence for the Well Balanced Chunk is the one defined by
11040 * the content production in the XML grammar:
11041 *
11042 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11043 *
Daniel Veillard7d515752003-09-26 19:12:37 +000011044 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11045 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000011046 *
11047 * In case recover is set to 1, the nodelist will not be empty even if
11048 * the parsed chunk is not well balanced.
11049 */
Daniel Veillard7d515752003-09-26 19:12:37 +000011050static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000011051xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
11052 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
11053 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011054 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011055 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011056 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011057 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011058 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011059 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000011060 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011061
11062 if (oldctxt->depth > 40) {
11063 return(XML_ERR_ENTITY_LOOP);
11064 }
11065
11066
11067 if (lst != NULL)
11068 *lst = NULL;
11069 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000011070 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011071
11072 size = xmlStrlen(string);
11073
11074 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000011075 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011076 if (user_data != NULL)
11077 ctxt->userData = user_data;
11078 else
11079 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011080 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
11081 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000011082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011085
11086 oldsax = ctxt->sax;
11087 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011088 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011089 ctxt->replaceEntities = oldctxt->replaceEntities;
11090 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011091
Daniel Veillarde1ca5032002-12-09 14:13:43 +000011092 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011093 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011094 newDoc = xmlNewDoc(BAD_CAST "1.0");
11095 if (newDoc == NULL) {
11096 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011097 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011098 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000011099 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011100 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011101 newDoc->dict = ctxt->dict;
11102 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011103 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011104 } else {
11105 ctxt->myDoc = oldctxt->myDoc;
11106 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011107 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011108 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011109 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
11110 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011111 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011112 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000011113 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011114 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011115 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011116 }
William M. Brack7b9154b2003-09-27 19:23:50 +000011117 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011118 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011119 ctxt->myDoc->children = NULL;
11120 ctxt->myDoc->last = NULL;
11121 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000011122 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011123 ctxt->instate = XML_PARSER_CONTENT;
11124 ctxt->depth = oldctxt->depth + 1;
11125
Daniel Veillard328f48c2002-11-15 15:24:34 +000011126 ctxt->validate = 0;
11127 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000011128 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
11129 /*
11130 * ID/IDREF registration will be done in xmlValidateElement below
11131 */
11132 ctxt->loadsubset |= XML_SKIP_IDS;
11133 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011134 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011135 ctxt->attsDefault = oldctxt->attsDefault;
11136 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011137
Daniel Veillard68e9e742002-11-16 15:35:11 +000011138 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011139 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011140 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011141 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011142 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011143 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011144 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011145 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000011146 }
11147
11148 if (!ctxt->wellFormed) {
11149 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000011150 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011151 else
William M. Brack7b9154b2003-09-27 19:23:50 +000011152 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011153 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000011154 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011155 }
11156
William M. Brack7b9154b2003-09-27 19:23:50 +000011157 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000011158 xmlNodePtr cur;
11159
11160 /*
11161 * Return the newly created nodeset after unlinking it from
11162 * they pseudo parent.
11163 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000011164 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011165 *lst = cur;
11166 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000011167#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8d589042003-02-04 15:07:21 +000011168 if (oldctxt->validate && oldctxt->wellFormed &&
11169 oldctxt->myDoc && oldctxt->myDoc->intSubset) {
11170 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
11171 oldctxt->myDoc, cur);
11172 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011173#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000011174 cur->parent = NULL;
11175 cur = cur->next;
11176 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000011177 ctxt->myDoc->children->children = NULL;
11178 }
11179 if (ctxt->myDoc != NULL) {
11180 xmlFreeNode(ctxt->myDoc->children);
11181 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011182 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011183 }
11184
11185 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000011186 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000011187 ctxt->attsDefault = NULL;
11188 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000011189 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011190 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000011191 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000011192 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000011193
11194 return(ret);
11195}
11196
Daniel Veillard29b17482004-08-16 00:39:03 +000011197/**
11198 * xmlParseInNodeContext:
11199 * @node: the context node
11200 * @data: the input string
11201 * @datalen: the input string length in bytes
11202 * @options: a combination of xmlParserOption
11203 * @lst: the return value for the set of parsed nodes
11204 *
11205 * Parse a well-balanced chunk of an XML document
11206 * within the context (DTD, namespaces, etc ...) of the given node.
11207 *
11208 * The allowed sequence for the data is a Well Balanced Chunk defined by
11209 * the content production in the XML grammar:
11210 *
11211 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11212 *
11213 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
11214 * error code otherwise
11215 */
11216xmlParserErrors
11217xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
11218 int options, xmlNodePtr *lst) {
11219#ifdef SAX2
11220 xmlParserCtxtPtr ctxt;
11221 xmlDocPtr doc = NULL;
11222 xmlNodePtr fake, cur;
11223 int nsnr = 0;
11224
11225 xmlParserErrors ret = XML_ERR_OK;
11226
11227 /*
11228 * check all input parameters, grab the document
11229 */
11230 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
11231 return(XML_ERR_INTERNAL_ERROR);
11232 switch (node->type) {
11233 case XML_ELEMENT_NODE:
11234 case XML_ATTRIBUTE_NODE:
11235 case XML_TEXT_NODE:
11236 case XML_CDATA_SECTION_NODE:
11237 case XML_ENTITY_REF_NODE:
11238 case XML_PI_NODE:
11239 case XML_COMMENT_NODE:
11240 case XML_DOCUMENT_NODE:
11241 case XML_HTML_DOCUMENT_NODE:
11242 break;
11243 default:
11244 return(XML_ERR_INTERNAL_ERROR);
11245
11246 }
11247 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
11248 (node->type != XML_DOCUMENT_NODE) &&
11249 (node->type != XML_HTML_DOCUMENT_NODE))
11250 node = node->parent;
11251 if (node == NULL)
11252 return(XML_ERR_INTERNAL_ERROR);
11253 if (node->type == XML_ELEMENT_NODE)
11254 doc = node->doc;
11255 else
11256 doc = (xmlDocPtr) node;
11257 if (doc == NULL)
11258 return(XML_ERR_INTERNAL_ERROR);
11259
11260 /*
11261 * allocate a context and set-up everything not related to the
11262 * node position in the tree
11263 */
11264 if (doc->type == XML_DOCUMENT_NODE)
11265 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
11266#ifdef LIBXML_HTML_ENABLED
11267 else if (doc->type == XML_HTML_DOCUMENT_NODE)
11268 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
11269#endif
11270 else
11271 return(XML_ERR_INTERNAL_ERROR);
11272
11273 if (ctxt == NULL)
11274 return(XML_ERR_NO_MEMORY);
11275 fake = xmlNewComment(NULL);
11276 if (fake == NULL) {
11277 xmlFreeParserCtxt(ctxt);
11278 return(XML_ERR_NO_MEMORY);
11279 }
11280 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000011281
11282 /*
11283 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
11284 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
11285 * we must wait until the last moment to free the original one.
11286 */
Daniel Veillard29b17482004-08-16 00:39:03 +000011287 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000011288 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000011289 xmlDictFree(ctxt->dict);
11290 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000011291 } else
11292 options |= XML_PARSE_NODICT;
11293
11294 xmlCtxtUseOptions(ctxt, options);
Daniel Veillard29b17482004-08-16 00:39:03 +000011295 xmlDetectSAX2(ctxt);
11296 ctxt->myDoc = doc;
11297
11298 if (node->type == XML_ELEMENT_NODE) {
11299 nodePush(ctxt, node);
11300 /*
11301 * initialize the SAX2 namespaces stack
11302 */
11303 cur = node;
11304 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
11305 xmlNsPtr ns = cur->nsDef;
11306 const xmlChar *iprefix, *ihref;
11307
11308 while (ns != NULL) {
11309 if (ctxt->dict) {
11310 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
11311 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
11312 } else {
11313 iprefix = ns->prefix;
11314 ihref = ns->href;
11315 }
11316
11317 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
11318 nsPush(ctxt, iprefix, ihref);
11319 nsnr++;
11320 }
11321 ns = ns->next;
11322 }
11323 cur = cur->parent;
11324 }
11325 ctxt->instate = XML_PARSER_CONTENT;
11326 }
11327
11328 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
11329 /*
11330 * ID/IDREF registration will be done in xmlValidateElement below
11331 */
11332 ctxt->loadsubset |= XML_SKIP_IDS;
11333 }
11334
11335 xmlParseContent(ctxt);
11336 nsPop(ctxt, nsnr);
11337 if ((RAW == '<') && (NXT(1) == '/')) {
11338 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11339 } else if (RAW != 0) {
11340 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11341 }
11342 if ((ctxt->node != NULL) && (ctxt->node != node)) {
11343 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11344 ctxt->wellFormed = 0;
11345 }
11346
11347 if (!ctxt->wellFormed) {
11348 if (ctxt->errNo == 0)
11349 ret = XML_ERR_INTERNAL_ERROR;
11350 else
11351 ret = (xmlParserErrors)ctxt->errNo;
11352 } else {
11353 ret = XML_ERR_OK;
11354 }
11355
11356 /*
11357 * Return the newly created nodeset after unlinking it from
11358 * the pseudo sibling.
11359 */
11360
11361 cur = fake->next;
11362 fake->next = NULL;
11363 node->last = fake;
11364
11365 if (cur != NULL) {
11366 cur->prev = NULL;
11367 }
11368
11369 *lst = cur;
11370
11371 while (cur != NULL) {
11372 cur->parent = NULL;
11373 cur = cur->next;
11374 }
11375
11376 xmlUnlinkNode(fake);
11377 xmlFreeNode(fake);
11378
11379
11380 if (ret != XML_ERR_OK) {
11381 xmlFreeNodeList(*lst);
11382 *lst = NULL;
11383 }
William M. Brackc3f81342004-10-03 01:22:44 +000011384
William M. Brackb7b54de2004-10-06 16:38:01 +000011385 if (doc->dict != NULL)
11386 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000011387 xmlFreeParserCtxt(ctxt);
11388
11389 return(ret);
11390#else /* !SAX2 */
11391 return(XML_ERR_INTERNAL_ERROR);
11392#endif
11393}
11394
Daniel Veillard81273902003-09-30 00:43:48 +000011395#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000011396/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000011397 * xmlParseBalancedChunkMemoryRecover:
11398 * @doc: the document the chunk pertains to
11399 * @sax: the SAX handler bloc (possibly NULL)
11400 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11401 * @depth: Used for loop detection, use 0
11402 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11403 * @lst: the return value for the set of parsed nodes
11404 * @recover: return nodes even if the data is broken (use 0)
11405 *
11406 *
11407 * Parse a well-balanced chunk of an XML document
11408 * called by the parser
11409 * The allowed sequence for the Well Balanced Chunk is the one defined by
11410 * the content production in the XML grammar:
11411 *
11412 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11413 *
11414 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11415 * the parser error code otherwise
11416 *
11417 * In case recover is set to 1, the nodelist will not be empty even if
11418 * the parsed chunk is not well balanced.
11419 */
11420int
11421xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11422 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
11423 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000011424 xmlParserCtxtPtr ctxt;
11425 xmlDocPtr newDoc;
11426 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011427 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011428 int size;
11429 int ret = 0;
11430
11431 if (depth > 40) {
11432 return(XML_ERR_ENTITY_LOOP);
11433 }
11434
11435
Daniel Veillardcda96922001-08-21 10:56:31 +000011436 if (lst != NULL)
11437 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011438 if (string == NULL)
11439 return(-1);
11440
11441 size = xmlStrlen(string);
11442
11443 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11444 if (ctxt == NULL) return(-1);
11445 ctxt->userData = ctxt;
11446 if (sax != NULL) {
11447 oldsax = ctxt->sax;
11448 ctxt->sax = sax;
11449 if (user_data != NULL)
11450 ctxt->userData = user_data;
11451 }
11452 newDoc = xmlNewDoc(BAD_CAST "1.0");
11453 if (newDoc == NULL) {
11454 xmlFreeParserCtxt(ctxt);
11455 return(-1);
11456 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011457 if ((doc != NULL) && (doc->dict != NULL)) {
11458 xmlDictFree(ctxt->dict);
11459 ctxt->dict = doc->dict;
11460 xmlDictReference(ctxt->dict);
11461 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
11462 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
11463 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
11464 ctxt->dictNames = 1;
11465 } else {
11466 xmlCtxtUseOptions(ctxt, XML_PARSE_NODICT);
11467 }
Owen Taylor3473f882001-02-23 17:55:21 +000011468 if (doc != NULL) {
11469 newDoc->intSubset = doc->intSubset;
11470 newDoc->extSubset = doc->extSubset;
11471 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011472 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11473 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011474 if (sax != NULL)
11475 ctxt->sax = oldsax;
11476 xmlFreeParserCtxt(ctxt);
11477 newDoc->intSubset = NULL;
11478 newDoc->extSubset = NULL;
11479 xmlFreeDoc(newDoc);
11480 return(-1);
11481 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011482 xmlAddChild((xmlNodePtr) newDoc, newRoot);
11483 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011484 if (doc == NULL) {
11485 ctxt->myDoc = newDoc;
11486 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000011487 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000011488 newDoc->children->doc = doc;
11489 }
11490 ctxt->instate = XML_PARSER_CONTENT;
11491 ctxt->depth = depth;
11492
11493 /*
11494 * Doing validity checking on chunk doesn't make sense
11495 */
11496 ctxt->validate = 0;
11497 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011498 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011499
Daniel Veillardb39bc392002-10-26 19:29:51 +000011500 if ( doc != NULL ){
11501 content = doc->children;
11502 doc->children = NULL;
11503 xmlParseContent(ctxt);
11504 doc->children = content;
11505 }
11506 else {
11507 xmlParseContent(ctxt);
11508 }
Owen Taylor3473f882001-02-23 17:55:21 +000011509 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011510 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011511 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011512 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011513 }
11514 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011515 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011516 }
11517
11518 if (!ctxt->wellFormed) {
11519 if (ctxt->errNo == 0)
11520 ret = 1;
11521 else
11522 ret = ctxt->errNo;
11523 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000011524 ret = 0;
11525 }
11526
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011527 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
11528 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000011529
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011530 /*
11531 * Return the newly created nodeset after unlinking it from
11532 * they pseudo parent.
11533 */
11534 cur = newDoc->children->children;
11535 *lst = cur;
11536 while (cur != NULL) {
11537 xmlSetTreeDoc(cur, doc);
11538 cur->parent = NULL;
11539 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000011540 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000011541 newDoc->children->children = NULL;
11542 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000011543
Owen Taylor3473f882001-02-23 17:55:21 +000011544 if (sax != NULL)
11545 ctxt->sax = oldsax;
11546 xmlFreeParserCtxt(ctxt);
11547 newDoc->intSubset = NULL;
11548 newDoc->extSubset = NULL;
11549 xmlFreeDoc(newDoc);
11550
11551 return(ret);
11552}
11553
11554/**
11555 * xmlSAXParseEntity:
11556 * @sax: the SAX handler block
11557 * @filename: the filename
11558 *
11559 * parse an XML external entity out of context and build a tree.
11560 * It use the given SAX function block to handle the parsing callback.
11561 * If sax is NULL, fallback to the default DOM tree building routines.
11562 *
11563 * [78] extParsedEnt ::= TextDecl? content
11564 *
11565 * This correspond to a "Well Balanced" chunk
11566 *
11567 * Returns the resulting document tree
11568 */
11569
11570xmlDocPtr
11571xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
11572 xmlDocPtr ret;
11573 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011574
11575 ctxt = xmlCreateFileParserCtxt(filename);
11576 if (ctxt == NULL) {
11577 return(NULL);
11578 }
11579 if (sax != NULL) {
11580 if (ctxt->sax != NULL)
11581 xmlFree(ctxt->sax);
11582 ctxt->sax = sax;
11583 ctxt->userData = NULL;
11584 }
11585
Owen Taylor3473f882001-02-23 17:55:21 +000011586 xmlParseExtParsedEnt(ctxt);
11587
11588 if (ctxt->wellFormed)
11589 ret = ctxt->myDoc;
11590 else {
11591 ret = NULL;
11592 xmlFreeDoc(ctxt->myDoc);
11593 ctxt->myDoc = NULL;
11594 }
11595 if (sax != NULL)
11596 ctxt->sax = NULL;
11597 xmlFreeParserCtxt(ctxt);
11598
11599 return(ret);
11600}
11601
11602/**
11603 * xmlParseEntity:
11604 * @filename: the filename
11605 *
11606 * parse an XML external entity out of context and build a tree.
11607 *
11608 * [78] extParsedEnt ::= TextDecl? content
11609 *
11610 * This correspond to a "Well Balanced" chunk
11611 *
11612 * Returns the resulting document tree
11613 */
11614
11615xmlDocPtr
11616xmlParseEntity(const char *filename) {
11617 return(xmlSAXParseEntity(NULL, filename));
11618}
Daniel Veillard81273902003-09-30 00:43:48 +000011619#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011620
11621/**
11622 * xmlCreateEntityParserCtxt:
11623 * @URL: the entity URL
11624 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011625 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000011626 *
11627 * Create a parser context for an external entity
11628 * Automatic support for ZLIB/Compress compressed document is provided
11629 * by default if found at compile-time.
11630 *
11631 * Returns the new parser context or NULL
11632 */
11633xmlParserCtxtPtr
11634xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11635 const xmlChar *base) {
11636 xmlParserCtxtPtr ctxt;
11637 xmlParserInputPtr inputStream;
11638 char *directory = NULL;
11639 xmlChar *uri;
11640
11641 ctxt = xmlNewParserCtxt();
11642 if (ctxt == NULL) {
11643 return(NULL);
11644 }
11645
11646 uri = xmlBuildURI(URL, base);
11647
11648 if (uri == NULL) {
11649 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11650 if (inputStream == NULL) {
11651 xmlFreeParserCtxt(ctxt);
11652 return(NULL);
11653 }
11654
11655 inputPush(ctxt, inputStream);
11656
11657 if ((ctxt->directory == NULL) && (directory == NULL))
11658 directory = xmlParserGetDirectory((char *)URL);
11659 if ((ctxt->directory == NULL) && (directory != NULL))
11660 ctxt->directory = directory;
11661 } else {
11662 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11663 if (inputStream == NULL) {
11664 xmlFree(uri);
11665 xmlFreeParserCtxt(ctxt);
11666 return(NULL);
11667 }
11668
11669 inputPush(ctxt, inputStream);
11670
11671 if ((ctxt->directory == NULL) && (directory == NULL))
11672 directory = xmlParserGetDirectory((char *)uri);
11673 if ((ctxt->directory == NULL) && (directory != NULL))
11674 ctxt->directory = directory;
11675 xmlFree(uri);
11676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677 return(ctxt);
11678}
11679
11680/************************************************************************
11681 * *
11682 * Front ends when parsing from a file *
11683 * *
11684 ************************************************************************/
11685
11686/**
Daniel Veillard61b93382003-11-03 14:28:31 +000011687 * xmlCreateURLParserCtxt:
11688 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000011689 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000011690 *
Daniel Veillard61b93382003-11-03 14:28:31 +000011691 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000011692 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000011693 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000011694 *
11695 * Returns the new parser context or NULL
11696 */
11697xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000011698xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000011699{
11700 xmlParserCtxtPtr ctxt;
11701 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000011702 char *directory = NULL;
11703
Owen Taylor3473f882001-02-23 17:55:21 +000011704 ctxt = xmlNewParserCtxt();
11705 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011706 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000011707 return(NULL);
11708 }
11709
Daniel Veillarddf292f72005-01-16 19:00:15 +000011710 if (options)
11711 xmlCtxtUseOptions(ctxt, options);
11712 ctxt->linenumbers = 1;
Igor Zlatkovicce076162003-02-23 13:39:39 +000011713
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000011714 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011715 if (inputStream == NULL) {
11716 xmlFreeParserCtxt(ctxt);
11717 return(NULL);
11718 }
11719
Owen Taylor3473f882001-02-23 17:55:21 +000011720 inputPush(ctxt, inputStream);
11721 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011722 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011723 if ((ctxt->directory == NULL) && (directory != NULL))
11724 ctxt->directory = directory;
11725
11726 return(ctxt);
11727}
11728
Daniel Veillard61b93382003-11-03 14:28:31 +000011729/**
11730 * xmlCreateFileParserCtxt:
11731 * @filename: the filename
11732 *
11733 * Create a parser context for a file content.
11734 * Automatic support for ZLIB/Compress compressed document is provided
11735 * by default if found at compile-time.
11736 *
11737 * Returns the new parser context or NULL
11738 */
11739xmlParserCtxtPtr
11740xmlCreateFileParserCtxt(const char *filename)
11741{
11742 return(xmlCreateURLParserCtxt(filename, 0));
11743}
11744
Daniel Veillard81273902003-09-30 00:43:48 +000011745#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011746/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011747 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000011748 * @sax: the SAX handler block
11749 * @filename: the filename
11750 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11751 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000011752 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000011753 *
11754 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11755 * compressed document is provided by default if found at compile-time.
11756 * It use the given SAX function block to handle the parsing callback.
11757 * If sax is NULL, fallback to the default DOM tree building routines.
11758 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000011759 * User data (void *) is stored within the parser context in the
11760 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000011761 *
Owen Taylor3473f882001-02-23 17:55:21 +000011762 * Returns the resulting document tree
11763 */
11764
11765xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000011766xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
11767 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000011768 xmlDocPtr ret;
11769 xmlParserCtxtPtr ctxt;
11770 char *directory = NULL;
11771
Daniel Veillard635ef722001-10-29 11:48:19 +000011772 xmlInitParser();
11773
Owen Taylor3473f882001-02-23 17:55:21 +000011774 ctxt = xmlCreateFileParserCtxt(filename);
11775 if (ctxt == NULL) {
11776 return(NULL);
11777 }
11778 if (sax != NULL) {
11779 if (ctxt->sax != NULL)
11780 xmlFree(ctxt->sax);
11781 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000011782 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011783 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000011784 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000011785 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000011786 }
Owen Taylor3473f882001-02-23 17:55:21 +000011787
11788 if ((ctxt->directory == NULL) && (directory == NULL))
11789 directory = xmlParserGetDirectory(filename);
11790 if ((ctxt->directory == NULL) && (directory != NULL))
11791 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
11792
Daniel Veillarddad3f682002-11-17 16:47:27 +000011793 ctxt->recovery = recovery;
11794
Owen Taylor3473f882001-02-23 17:55:21 +000011795 xmlParseDocument(ctxt);
11796
William M. Brackc07329e2003-09-08 01:57:30 +000011797 if ((ctxt->wellFormed) || recovery) {
11798 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000011799 if (ret != NULL) {
11800 if (ctxt->input->buf->compressed > 0)
11801 ret->compression = 9;
11802 else
11803 ret->compression = ctxt->input->buf->compressed;
11804 }
William M. Brackc07329e2003-09-08 01:57:30 +000011805 }
Owen Taylor3473f882001-02-23 17:55:21 +000011806 else {
11807 ret = NULL;
11808 xmlFreeDoc(ctxt->myDoc);
11809 ctxt->myDoc = NULL;
11810 }
11811 if (sax != NULL)
11812 ctxt->sax = NULL;
11813 xmlFreeParserCtxt(ctxt);
11814
11815 return(ret);
11816}
11817
11818/**
Daniel Veillarda293c322001-10-02 13:54:14 +000011819 * xmlSAXParseFile:
11820 * @sax: the SAX handler block
11821 * @filename: the filename
11822 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11823 * documents
11824 *
11825 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11826 * compressed document is provided by default if found at compile-time.
11827 * It use the given SAX function block to handle the parsing callback.
11828 * If sax is NULL, fallback to the default DOM tree building routines.
11829 *
11830 * Returns the resulting document tree
11831 */
11832
11833xmlDocPtr
11834xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
11835 int recovery) {
11836 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
11837}
11838
11839/**
Owen Taylor3473f882001-02-23 17:55:21 +000011840 * xmlRecoverDoc:
11841 * @cur: a pointer to an array of xmlChar
11842 *
11843 * parse an XML in-memory document and build a tree.
11844 * In the case the document is not Well Formed, a tree is built anyway
11845 *
11846 * Returns the resulting document tree
11847 */
11848
11849xmlDocPtr
11850xmlRecoverDoc(xmlChar *cur) {
11851 return(xmlSAXParseDoc(NULL, cur, 1));
11852}
11853
11854/**
11855 * xmlParseFile:
11856 * @filename: the filename
11857 *
11858 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11859 * compressed document is provided by default if found at compile-time.
11860 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000011861 * Returns the resulting document tree if the file was wellformed,
11862 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000011863 */
11864
11865xmlDocPtr
11866xmlParseFile(const char *filename) {
11867 return(xmlSAXParseFile(NULL, filename, 0));
11868}
11869
11870/**
11871 * xmlRecoverFile:
11872 * @filename: the filename
11873 *
11874 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11875 * compressed document is provided by default if found at compile-time.
11876 * In the case the document is not Well Formed, a tree is built anyway
11877 *
11878 * Returns the resulting document tree
11879 */
11880
11881xmlDocPtr
11882xmlRecoverFile(const char *filename) {
11883 return(xmlSAXParseFile(NULL, filename, 1));
11884}
11885
11886
11887/**
11888 * xmlSetupParserForBuffer:
11889 * @ctxt: an XML parser context
11890 * @buffer: a xmlChar * buffer
11891 * @filename: a file name
11892 *
11893 * Setup the parser context to parse a new buffer; Clears any prior
11894 * contents from the parser context. The buffer parameter must not be
11895 * NULL, but the filename parameter can be
11896 */
11897void
11898xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
11899 const char* filename)
11900{
11901 xmlParserInputPtr input;
11902
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011903 if ((ctxt == NULL) || (buffer == NULL))
11904 return;
11905
Owen Taylor3473f882001-02-23 17:55:21 +000011906 input = xmlNewInputStream(ctxt);
11907 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011908 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011909 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011910 return;
11911 }
11912
11913 xmlClearParserCtxt(ctxt);
11914 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000011915 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000011916 input->base = buffer;
11917 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011918 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000011919 inputPush(ctxt, input);
11920}
11921
11922/**
11923 * xmlSAXUserParseFile:
11924 * @sax: a SAX handler
11925 * @user_data: The user data returned on SAX callbacks
11926 * @filename: a file name
11927 *
11928 * parse an XML file and call the given SAX handler routines.
11929 * Automatic support for ZLIB/Compress compressed document is provided
11930 *
11931 * Returns 0 in case of success or a error number otherwise
11932 */
11933int
11934xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11935 const char *filename) {
11936 int ret = 0;
11937 xmlParserCtxtPtr ctxt;
11938
11939 ctxt = xmlCreateFileParserCtxt(filename);
11940 if (ctxt == NULL) return -1;
Daniel Veillard81273902003-09-30 00:43:48 +000011941#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011942 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011943#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011944 xmlFree(ctxt->sax);
11945 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011946 xmlDetectSAX2(ctxt);
11947
Owen Taylor3473f882001-02-23 17:55:21 +000011948 if (user_data != NULL)
11949 ctxt->userData = user_data;
11950
11951 xmlParseDocument(ctxt);
11952
11953 if (ctxt->wellFormed)
11954 ret = 0;
11955 else {
11956 if (ctxt->errNo != 0)
11957 ret = ctxt->errNo;
11958 else
11959 ret = -1;
11960 }
11961 if (sax != NULL)
11962 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000011963 if (ctxt->myDoc != NULL) {
11964 xmlFreeDoc(ctxt->myDoc);
11965 ctxt->myDoc = NULL;
11966 }
Owen Taylor3473f882001-02-23 17:55:21 +000011967 xmlFreeParserCtxt(ctxt);
11968
11969 return ret;
11970}
Daniel Veillard81273902003-09-30 00:43:48 +000011971#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011972
11973/************************************************************************
11974 * *
11975 * Front ends when parsing from memory *
11976 * *
11977 ************************************************************************/
11978
11979/**
11980 * xmlCreateMemoryParserCtxt:
11981 * @buffer: a pointer to a char array
11982 * @size: the size of the array
11983 *
11984 * Create a parser context for an XML in-memory document.
11985 *
11986 * Returns the new parser context or NULL
11987 */
11988xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000011989xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000011990 xmlParserCtxtPtr ctxt;
11991 xmlParserInputPtr input;
11992 xmlParserInputBufferPtr buf;
11993
11994 if (buffer == NULL)
11995 return(NULL);
11996 if (size <= 0)
11997 return(NULL);
11998
11999 ctxt = xmlNewParserCtxt();
12000 if (ctxt == NULL)
12001 return(NULL);
12002
Daniel Veillard53350552003-09-18 13:35:51 +000012003 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000012004 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012005 if (buf == NULL) {
12006 xmlFreeParserCtxt(ctxt);
12007 return(NULL);
12008 }
Owen Taylor3473f882001-02-23 17:55:21 +000012009
12010 input = xmlNewInputStream(ctxt);
12011 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000012012 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012013 xmlFreeParserCtxt(ctxt);
12014 return(NULL);
12015 }
12016
12017 input->filename = NULL;
12018 input->buf = buf;
12019 input->base = input->buf->buffer->content;
12020 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000012021 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000012022
12023 inputPush(ctxt, input);
12024 return(ctxt);
12025}
12026
Daniel Veillard81273902003-09-30 00:43:48 +000012027#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012028/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012029 * xmlSAXParseMemoryWithData:
12030 * @sax: the SAX handler block
12031 * @buffer: an pointer to a char array
12032 * @size: the size of the array
12033 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12034 * documents
12035 * @data: the userdata
12036 *
12037 * parse an XML in-memory block and use the given SAX function block
12038 * to handle the parsing callback. If sax is NULL, fallback to the default
12039 * DOM tree building routines.
12040 *
12041 * User data (void *) is stored within the parser context in the
12042 * context's _private member, so it is available nearly everywhere in libxml
12043 *
12044 * Returns the resulting document tree
12045 */
12046
12047xmlDocPtr
12048xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12049 int size, int recovery, void *data) {
12050 xmlDocPtr ret;
12051 xmlParserCtxtPtr ctxt;
12052
12053 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12054 if (ctxt == NULL) return(NULL);
12055 if (sax != NULL) {
12056 if (ctxt->sax != NULL)
12057 xmlFree(ctxt->sax);
12058 ctxt->sax = sax;
12059 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012060 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012061 if (data!=NULL) {
12062 ctxt->_private=data;
12063 }
12064
Daniel Veillardadba5f12003-04-04 16:09:01 +000012065 ctxt->recovery = recovery;
12066
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012067 xmlParseDocument(ctxt);
12068
12069 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12070 else {
12071 ret = NULL;
12072 xmlFreeDoc(ctxt->myDoc);
12073 ctxt->myDoc = NULL;
12074 }
12075 if (sax != NULL)
12076 ctxt->sax = NULL;
12077 xmlFreeParserCtxt(ctxt);
12078
12079 return(ret);
12080}
12081
12082/**
Owen Taylor3473f882001-02-23 17:55:21 +000012083 * xmlSAXParseMemory:
12084 * @sax: the SAX handler block
12085 * @buffer: an pointer to a char array
12086 * @size: the size of the array
12087 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12088 * documents
12089 *
12090 * parse an XML in-memory block and use the given SAX function block
12091 * to handle the parsing callback. If sax is NULL, fallback to the default
12092 * DOM tree building routines.
12093 *
12094 * Returns the resulting document tree
12095 */
12096xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000012097xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
12098 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000012099 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012100}
12101
12102/**
12103 * xmlParseMemory:
12104 * @buffer: an pointer to a char array
12105 * @size: the size of the array
12106 *
12107 * parse an XML in-memory block and build a tree.
12108 *
12109 * Returns the resulting document tree
12110 */
12111
Daniel Veillard50822cb2001-07-26 20:05:51 +000012112xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012113 return(xmlSAXParseMemory(NULL, buffer, size, 0));
12114}
12115
12116/**
12117 * xmlRecoverMemory:
12118 * @buffer: an pointer to a char array
12119 * @size: the size of the array
12120 *
12121 * parse an XML in-memory block and build a tree.
12122 * In the case the document is not Well Formed, a tree is built anyway
12123 *
12124 * Returns the resulting document tree
12125 */
12126
Daniel Veillard50822cb2001-07-26 20:05:51 +000012127xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012128 return(xmlSAXParseMemory(NULL, buffer, size, 1));
12129}
12130
12131/**
12132 * xmlSAXUserParseMemory:
12133 * @sax: a SAX handler
12134 * @user_data: The user data returned on SAX callbacks
12135 * @buffer: an in-memory XML document input
12136 * @size: the length of the XML document in bytes
12137 *
12138 * A better SAX parsing routine.
12139 * parse an XML in-memory buffer and call the given SAX handler routines.
12140 *
12141 * Returns 0 in case of success or a error number otherwise
12142 */
12143int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000012144 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000012145 int ret = 0;
12146 xmlParserCtxtPtr ctxt;
12147 xmlSAXHandlerPtr oldsax = NULL;
12148
Daniel Veillard9e923512002-08-14 08:48:52 +000012149 if (sax == NULL) return -1;
Owen Taylor3473f882001-02-23 17:55:21 +000012150 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12151 if (ctxt == NULL) return -1;
Daniel Veillard9e923512002-08-14 08:48:52 +000012152 oldsax = ctxt->sax;
12153 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012154 xmlDetectSAX2(ctxt);
Daniel Veillard30211a02001-04-26 09:33:18 +000012155 if (user_data != NULL)
12156 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000012157
12158 xmlParseDocument(ctxt);
12159
12160 if (ctxt->wellFormed)
12161 ret = 0;
12162 else {
12163 if (ctxt->errNo != 0)
12164 ret = ctxt->errNo;
12165 else
12166 ret = -1;
12167 }
Daniel Veillard9e923512002-08-14 08:48:52 +000012168 ctxt->sax = oldsax;
Daniel Veillard34099b42004-11-04 17:34:35 +000012169 if (ctxt->myDoc != NULL) {
12170 xmlFreeDoc(ctxt->myDoc);
12171 ctxt->myDoc = NULL;
12172 }
Owen Taylor3473f882001-02-23 17:55:21 +000012173 xmlFreeParserCtxt(ctxt);
12174
12175 return ret;
12176}
Daniel Veillard81273902003-09-30 00:43:48 +000012177#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012178
12179/**
12180 * xmlCreateDocParserCtxt:
12181 * @cur: a pointer to an array of xmlChar
12182 *
12183 * Creates a parser context for an XML in-memory document.
12184 *
12185 * Returns the new parser context or NULL
12186 */
12187xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012188xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000012189 int len;
12190
12191 if (cur == NULL)
12192 return(NULL);
12193 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012194 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000012195}
12196
Daniel Veillard81273902003-09-30 00:43:48 +000012197#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012198/**
12199 * xmlSAXParseDoc:
12200 * @sax: the SAX handler block
12201 * @cur: a pointer to an array of xmlChar
12202 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12203 * documents
12204 *
12205 * parse an XML in-memory document and build a tree.
12206 * It use the given SAX function block to handle the parsing callback.
12207 * If sax is NULL, fallback to the default DOM tree building routines.
12208 *
12209 * Returns the resulting document tree
12210 */
12211
12212xmlDocPtr
12213xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
12214 xmlDocPtr ret;
12215 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000012216 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012217
Daniel Veillard38936062004-11-04 17:45:11 +000012218 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012219
12220
12221 ctxt = xmlCreateDocParserCtxt(cur);
12222 if (ctxt == NULL) return(NULL);
12223 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000012224 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000012225 ctxt->sax = sax;
12226 ctxt->userData = NULL;
12227 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012228 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012229
12230 xmlParseDocument(ctxt);
12231 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
12232 else {
12233 ret = NULL;
12234 xmlFreeDoc(ctxt->myDoc);
12235 ctxt->myDoc = NULL;
12236 }
Daniel Veillard34099b42004-11-04 17:34:35 +000012237 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000012238 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000012239 xmlFreeParserCtxt(ctxt);
12240
12241 return(ret);
12242}
12243
12244/**
12245 * xmlParseDoc:
12246 * @cur: a pointer to an array of xmlChar
12247 *
12248 * parse an XML in-memory document and build a tree.
12249 *
12250 * Returns the resulting document tree
12251 */
12252
12253xmlDocPtr
12254xmlParseDoc(xmlChar *cur) {
12255 return(xmlSAXParseDoc(NULL, cur, 0));
12256}
Daniel Veillard81273902003-09-30 00:43:48 +000012257#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012258
Daniel Veillard81273902003-09-30 00:43:48 +000012259#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000012260/************************************************************************
12261 * *
12262 * Specific function to keep track of entities references *
12263 * and used by the XSLT debugger *
12264 * *
12265 ************************************************************************/
12266
12267static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
12268
12269/**
12270 * xmlAddEntityReference:
12271 * @ent : A valid entity
12272 * @firstNode : A valid first node for children of entity
12273 * @lastNode : A valid last node of children entity
12274 *
12275 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
12276 */
12277static void
12278xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
12279 xmlNodePtr lastNode)
12280{
12281 if (xmlEntityRefFunc != NULL) {
12282 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
12283 }
12284}
12285
12286
12287/**
12288 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000012289 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000012290 *
12291 * Set the function to call call back when a xml reference has been made
12292 */
12293void
12294xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
12295{
12296 xmlEntityRefFunc = func;
12297}
Daniel Veillard81273902003-09-30 00:43:48 +000012298#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012299
12300/************************************************************************
12301 * *
12302 * Miscellaneous *
12303 * *
12304 ************************************************************************/
12305
12306#ifdef LIBXML_XPATH_ENABLED
12307#include <libxml/xpath.h>
12308#endif
12309
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012310extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000012311static int xmlParserInitialized = 0;
12312
12313/**
12314 * xmlInitParser:
12315 *
12316 * Initialization function for the XML parser.
12317 * This is not reentrant. Call once before processing in case of
12318 * use in multithreaded programs.
12319 */
12320
12321void
12322xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000012323 if (xmlParserInitialized != 0)
12324 return;
Owen Taylor3473f882001-02-23 17:55:21 +000012325
Daniel Veillarddb5850a2002-01-18 11:49:26 +000012326 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
12327 (xmlGenericError == NULL))
12328 initGenericErrorDefaultFunc(NULL);
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012329 xmlInitGlobals();
Daniel Veillardd0463562001-10-13 09:15:48 +000012330 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000012331 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000012332 xmlInitCharEncodingHandlers();
Owen Taylor3473f882001-02-23 17:55:21 +000012333 xmlDefaultSAXHandlerInit();
12334 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012335#ifdef LIBXML_OUTPUT_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012336 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000012337#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012338#ifdef LIBXML_HTML_ENABLED
12339 htmlInitAutoClose();
12340 htmlDefaultSAXHandlerInit();
12341#endif
12342#ifdef LIBXML_XPATH_ENABLED
12343 xmlXPathInit();
12344#endif
12345 xmlParserInitialized = 1;
12346}
12347
12348/**
12349 * xmlCleanupParser:
12350 *
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012351 * Cleanup function for the XML library. It tries to reclaim all
12352 * parsing related global memory allocated for the library processing.
Owen Taylor3473f882001-02-23 17:55:21 +000012353 * It doesn't deallocate any document related memory. Calling this
Daniel Veillardd8cf9062003-11-11 21:12:36 +000012354 * function should not prevent reusing the library but one should
12355 * call xmlCleanupParser() only when the process has
Daniel Veillard7424eb62003-01-24 14:14:52 +000012356 * finished using the library or XML document built with it.
Owen Taylor3473f882001-02-23 17:55:21 +000012357 */
12358
12359void
12360xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000012361 if (!xmlParserInitialized)
12362 return;
12363
Owen Taylor3473f882001-02-23 17:55:21 +000012364 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000012365#ifdef LIBXML_CATALOG_ENABLED
12366 xmlCatalogCleanup();
12367#endif
Daniel Veillard14412512005-01-21 23:53:26 +000012368 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000012369 xmlCleanupInputCallbacks();
12370#ifdef LIBXML_OUTPUT_ENABLED
12371 xmlCleanupOutputCallbacks();
12372#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012373#ifdef LIBXML_SCHEMAS_ENABLED
12374 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000012375 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012376#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000012377 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000012378 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000012379 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000012380 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000012381 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012382}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012383
12384/************************************************************************
12385 * *
12386 * New set (2.6.0) of simpler and more flexible APIs *
12387 * *
12388 ************************************************************************/
12389
12390/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012391 * DICT_FREE:
12392 * @str: a string
12393 *
12394 * Free a string if it is not owned by the "dict" dictionnary in the
12395 * current scope
12396 */
12397#define DICT_FREE(str) \
12398 if ((str) && ((!dict) || \
12399 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
12400 xmlFree((char *)(str));
12401
12402/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012403 * xmlCtxtReset:
12404 * @ctxt: an XML parser context
12405 *
12406 * Reset a parser context
12407 */
12408void
12409xmlCtxtReset(xmlParserCtxtPtr ctxt)
12410{
12411 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012412 xmlDictPtr dict;
12413
12414 if (ctxt == NULL)
12415 return;
12416
12417 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012418
12419 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
12420 xmlFreeInputStream(input);
12421 }
12422 ctxt->inputNr = 0;
12423 ctxt->input = NULL;
12424
12425 ctxt->spaceNr = 0;
12426 ctxt->spaceTab[0] = -1;
12427 ctxt->space = &ctxt->spaceTab[0];
12428
12429
12430 ctxt->nodeNr = 0;
12431 ctxt->node = NULL;
12432
12433 ctxt->nameNr = 0;
12434 ctxt->name = NULL;
12435
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012436 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012437 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012438 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012439 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012440 DICT_FREE(ctxt->directory);
12441 ctxt->directory = NULL;
12442 DICT_FREE(ctxt->extSubURI);
12443 ctxt->extSubURI = NULL;
12444 DICT_FREE(ctxt->extSubSystem);
12445 ctxt->extSubSystem = NULL;
12446 if (ctxt->myDoc != NULL)
12447 xmlFreeDoc(ctxt->myDoc);
12448 ctxt->myDoc = NULL;
12449
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012450 ctxt->standalone = -1;
12451 ctxt->hasExternalSubset = 0;
12452 ctxt->hasPErefs = 0;
12453 ctxt->html = 0;
12454 ctxt->external = 0;
12455 ctxt->instate = XML_PARSER_START;
12456 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012457
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012458 ctxt->wellFormed = 1;
12459 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000012460 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012461 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012462#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012463 ctxt->vctxt.userData = ctxt;
12464 ctxt->vctxt.error = xmlParserValidityError;
12465 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000012466#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012467 ctxt->record_info = 0;
12468 ctxt->nbChars = 0;
12469 ctxt->checkIndex = 0;
12470 ctxt->inSubset = 0;
12471 ctxt->errNo = XML_ERR_OK;
12472 ctxt->depth = 0;
12473 ctxt->charset = XML_CHAR_ENCODING_UTF8;
12474 ctxt->catalogs = NULL;
12475 xmlInitNodeInfoSeq(&ctxt->node_seq);
12476
12477 if (ctxt->attsDefault != NULL) {
12478 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
12479 ctxt->attsDefault = NULL;
12480 }
12481 if (ctxt->attsSpecial != NULL) {
12482 xmlHashFree(ctxt->attsSpecial, NULL);
12483 ctxt->attsSpecial = NULL;
12484 }
12485
Daniel Veillard4432df22003-09-28 18:58:27 +000012486#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012487 if (ctxt->catalogs != NULL)
12488 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000012489#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000012490 if (ctxt->lastError.code != XML_ERR_OK)
12491 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012492}
12493
12494/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012495 * xmlCtxtResetPush:
12496 * @ctxt: an XML parser context
12497 * @chunk: a pointer to an array of chars
12498 * @size: number of chars in the array
12499 * @filename: an optional file name or URI
12500 * @encoding: the document encoding, or NULL
12501 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012502 * Reset a push parser context
12503 *
12504 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012505 */
12506int
12507xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
12508 int size, const char *filename, const char *encoding)
12509{
12510 xmlParserInputPtr inputStream;
12511 xmlParserInputBufferPtr buf;
12512 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12513
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012514 if (ctxt == NULL)
12515 return(1);
12516
Daniel Veillard9ba8e382003-10-28 21:31:45 +000012517 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
12518 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12519
12520 buf = xmlAllocParserInputBuffer(enc);
12521 if (buf == NULL)
12522 return(1);
12523
12524 if (ctxt == NULL) {
12525 xmlFreeParserInputBuffer(buf);
12526 return(1);
12527 }
12528
12529 xmlCtxtReset(ctxt);
12530
12531 if (ctxt->pushTab == NULL) {
12532 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
12533 sizeof(xmlChar *));
12534 if (ctxt->pushTab == NULL) {
12535 xmlErrMemory(ctxt, NULL);
12536 xmlFreeParserInputBuffer(buf);
12537 return(1);
12538 }
12539 }
12540
12541 if (filename == NULL) {
12542 ctxt->directory = NULL;
12543 } else {
12544 ctxt->directory = xmlParserGetDirectory(filename);
12545 }
12546
12547 inputStream = xmlNewInputStream(ctxt);
12548 if (inputStream == NULL) {
12549 xmlFreeParserInputBuffer(buf);
12550 return(1);
12551 }
12552
12553 if (filename == NULL)
12554 inputStream->filename = NULL;
12555 else
12556 inputStream->filename = (char *)
12557 xmlCanonicPath((const xmlChar *) filename);
12558 inputStream->buf = buf;
12559 inputStream->base = inputStream->buf->buffer->content;
12560 inputStream->cur = inputStream->buf->buffer->content;
12561 inputStream->end =
12562 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
12563
12564 inputPush(ctxt, inputStream);
12565
12566 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12567 (ctxt->input->buf != NULL)) {
12568 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
12569 int cur = ctxt->input->cur - ctxt->input->base;
12570
12571 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12572
12573 ctxt->input->base = ctxt->input->buf->buffer->content + base;
12574 ctxt->input->cur = ctxt->input->base + cur;
12575 ctxt->input->end =
12576 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
12577 use];
12578#ifdef DEBUG_PUSH
12579 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12580#endif
12581 }
12582
12583 if (encoding != NULL) {
12584 xmlCharEncodingHandlerPtr hdlr;
12585
12586 hdlr = xmlFindCharEncodingHandler(encoding);
12587 if (hdlr != NULL) {
12588 xmlSwitchToEncoding(ctxt, hdlr);
12589 } else {
12590 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
12591 "Unsupported encoding %s\n", BAD_CAST encoding);
12592 }
12593 } else if (enc != XML_CHAR_ENCODING_NONE) {
12594 xmlSwitchEncoding(ctxt, enc);
12595 }
12596
12597 return(0);
12598}
12599
12600/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012601 * xmlCtxtUseOptions:
12602 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012603 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012604 *
12605 * Applies the options to the parser context
12606 *
12607 * Returns 0 in case of success, the set of unknown or unimplemented options
12608 * in case of error.
12609 */
12610int
12611xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
12612{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012613 if (ctxt == NULL)
12614 return(-1);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012615 if (options & XML_PARSE_RECOVER) {
12616 ctxt->recovery = 1;
12617 options -= XML_PARSE_RECOVER;
12618 } else
12619 ctxt->recovery = 0;
12620 if (options & XML_PARSE_DTDLOAD) {
12621 ctxt->loadsubset = XML_DETECT_IDS;
12622 options -= XML_PARSE_DTDLOAD;
12623 } else
12624 ctxt->loadsubset = 0;
12625 if (options & XML_PARSE_DTDATTR) {
12626 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
12627 options -= XML_PARSE_DTDATTR;
12628 }
12629 if (options & XML_PARSE_NOENT) {
12630 ctxt->replaceEntities = 1;
12631 /* ctxt->loadsubset |= XML_DETECT_IDS; */
12632 options -= XML_PARSE_NOENT;
12633 } else
12634 ctxt->replaceEntities = 0;
12635 if (options & XML_PARSE_NOWARNING) {
12636 ctxt->sax->warning = NULL;
12637 options -= XML_PARSE_NOWARNING;
12638 }
12639 if (options & XML_PARSE_NOERROR) {
12640 ctxt->sax->error = NULL;
12641 ctxt->sax->fatalError = NULL;
12642 options -= XML_PARSE_NOERROR;
12643 }
12644 if (options & XML_PARSE_PEDANTIC) {
12645 ctxt->pedantic = 1;
12646 options -= XML_PARSE_PEDANTIC;
12647 } else
12648 ctxt->pedantic = 0;
12649 if (options & XML_PARSE_NOBLANKS) {
12650 ctxt->keepBlanks = 0;
12651 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
12652 options -= XML_PARSE_NOBLANKS;
12653 } else
12654 ctxt->keepBlanks = 1;
12655 if (options & XML_PARSE_DTDVALID) {
12656 ctxt->validate = 1;
12657 if (options & XML_PARSE_NOWARNING)
12658 ctxt->vctxt.warning = NULL;
12659 if (options & XML_PARSE_NOERROR)
12660 ctxt->vctxt.error = NULL;
12661 options -= XML_PARSE_DTDVALID;
12662 } else
12663 ctxt->validate = 0;
Daniel Veillard81273902003-09-30 00:43:48 +000012664#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012665 if (options & XML_PARSE_SAX1) {
12666 ctxt->sax->startElement = xmlSAX2StartElement;
12667 ctxt->sax->endElement = xmlSAX2EndElement;
12668 ctxt->sax->startElementNs = NULL;
12669 ctxt->sax->endElementNs = NULL;
12670 ctxt->sax->initialized = 1;
12671 options -= XML_PARSE_SAX1;
12672 }
Daniel Veillard81273902003-09-30 00:43:48 +000012673#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012674 if (options & XML_PARSE_NODICT) {
12675 ctxt->dictNames = 0;
12676 options -= XML_PARSE_NODICT;
12677 } else {
12678 ctxt->dictNames = 1;
12679 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000012680 if (options & XML_PARSE_NOCDATA) {
12681 ctxt->sax->cdataBlock = NULL;
12682 options -= XML_PARSE_NOCDATA;
12683 }
12684 if (options & XML_PARSE_NSCLEAN) {
12685 ctxt->options |= XML_PARSE_NSCLEAN;
12686 options -= XML_PARSE_NSCLEAN;
12687 }
Daniel Veillard61b93382003-11-03 14:28:31 +000012688 if (options & XML_PARSE_NONET) {
12689 ctxt->options |= XML_PARSE_NONET;
12690 options -= XML_PARSE_NONET;
12691 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000012692 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012693 return (options);
12694}
12695
12696/**
12697 * xmlDoRead:
12698 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000012699 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012700 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012701 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012702 * @reuse: keep the context for reuse
12703 *
12704 * Common front-end for the xmlRead functions
12705 *
12706 * Returns the resulting document tree or NULL
12707 */
12708static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012709xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
12710 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012711{
12712 xmlDocPtr ret;
12713
12714 xmlCtxtUseOptions(ctxt, options);
12715 if (encoding != NULL) {
12716 xmlCharEncodingHandlerPtr hdlr;
12717
12718 hdlr = xmlFindCharEncodingHandler(encoding);
12719 if (hdlr != NULL)
12720 xmlSwitchToEncoding(ctxt, hdlr);
12721 }
Daniel Veillard60942de2003-09-25 21:05:58 +000012722 if ((URL != NULL) && (ctxt->input != NULL) &&
12723 (ctxt->input->filename == NULL))
12724 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012725 xmlParseDocument(ctxt);
12726 if ((ctxt->wellFormed) || ctxt->recovery)
12727 ret = ctxt->myDoc;
12728 else {
12729 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012730 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012731 xmlFreeDoc(ctxt->myDoc);
12732 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012733 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012734 ctxt->myDoc = NULL;
12735 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012736 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000012737 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012738
12739 return (ret);
12740}
12741
12742/**
12743 * xmlReadDoc:
12744 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012745 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012746 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012747 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012748 *
12749 * parse an XML in-memory document and build a tree.
12750 *
12751 * Returns the resulting document tree
12752 */
12753xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012754xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012755{
12756 xmlParserCtxtPtr ctxt;
12757
12758 if (cur == NULL)
12759 return (NULL);
12760
12761 ctxt = xmlCreateDocParserCtxt(cur);
12762 if (ctxt == NULL)
12763 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012764 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012765}
12766
12767/**
12768 * xmlReadFile:
12769 * @filename: a file or URL
12770 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012771 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012772 *
12773 * parse an XML file from the filesystem or the network.
12774 *
12775 * Returns the resulting document tree
12776 */
12777xmlDocPtr
12778xmlReadFile(const char *filename, const char *encoding, int options)
12779{
12780 xmlParserCtxtPtr ctxt;
12781
Daniel Veillard61b93382003-11-03 14:28:31 +000012782 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012783 if (ctxt == NULL)
12784 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012785 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012786}
12787
12788/**
12789 * xmlReadMemory:
12790 * @buffer: a pointer to a char array
12791 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012792 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012793 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012794 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012795 *
12796 * parse an XML in-memory document and build a tree.
12797 *
12798 * Returns the resulting document tree
12799 */
12800xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012801xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012802{
12803 xmlParserCtxtPtr ctxt;
12804
12805 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
12806 if (ctxt == NULL)
12807 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000012808 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012809}
12810
12811/**
12812 * xmlReadFd:
12813 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000012814 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012815 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012816 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012817 *
12818 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012819 * NOTE that the file descriptor will not be closed when the
12820 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012821 *
12822 * Returns the resulting document tree
12823 */
12824xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000012825xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012826{
12827 xmlParserCtxtPtr ctxt;
12828 xmlParserInputBufferPtr input;
12829 xmlParserInputPtr stream;
12830
12831 if (fd < 0)
12832 return (NULL);
12833
12834 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
12835 if (input == NULL)
12836 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000012837 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012838 ctxt = xmlNewParserCtxt();
12839 if (ctxt == NULL) {
12840 xmlFreeParserInputBuffer(input);
12841 return (NULL);
12842 }
12843 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12844 if (stream == NULL) {
12845 xmlFreeParserInputBuffer(input);
12846 xmlFreeParserCtxt(ctxt);
12847 return (NULL);
12848 }
12849 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012850 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012851}
12852
12853/**
12854 * xmlReadIO:
12855 * @ioread: an I/O read function
12856 * @ioclose: an I/O close function
12857 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000012858 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012859 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012860 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012861 *
12862 * parse an XML document from I/O functions and source and build a tree.
12863 *
12864 * Returns the resulting document tree
12865 */
12866xmlDocPtr
12867xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000012868 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012869{
12870 xmlParserCtxtPtr ctxt;
12871 xmlParserInputBufferPtr input;
12872 xmlParserInputPtr stream;
12873
12874 if (ioread == NULL)
12875 return (NULL);
12876
12877 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
12878 XML_CHAR_ENCODING_NONE);
12879 if (input == NULL)
12880 return (NULL);
12881 ctxt = xmlNewParserCtxt();
12882 if (ctxt == NULL) {
12883 xmlFreeParserInputBuffer(input);
12884 return (NULL);
12885 }
12886 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12887 if (stream == NULL) {
12888 xmlFreeParserInputBuffer(input);
12889 xmlFreeParserCtxt(ctxt);
12890 return (NULL);
12891 }
12892 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012893 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012894}
12895
12896/**
12897 * xmlCtxtReadDoc:
12898 * @ctxt: an XML parser context
12899 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000012900 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012901 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012902 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012903 *
12904 * parse an XML in-memory document and build a tree.
12905 * This reuses the existing @ctxt parser context
12906 *
12907 * Returns the resulting document tree
12908 */
12909xmlDocPtr
12910xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000012911 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012912{
12913 xmlParserInputPtr stream;
12914
12915 if (cur == NULL)
12916 return (NULL);
12917 if (ctxt == NULL)
12918 return (NULL);
12919
12920 xmlCtxtReset(ctxt);
12921
12922 stream = xmlNewStringInputStream(ctxt, cur);
12923 if (stream == NULL) {
12924 return (NULL);
12925 }
12926 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012927 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012928}
12929
12930/**
12931 * xmlCtxtReadFile:
12932 * @ctxt: an XML parser context
12933 * @filename: a file or URL
12934 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012935 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012936 *
12937 * parse an XML file from the filesystem or the network.
12938 * This reuses the existing @ctxt parser context
12939 *
12940 * Returns the resulting document tree
12941 */
12942xmlDocPtr
12943xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
12944 const char *encoding, int options)
12945{
12946 xmlParserInputPtr stream;
12947
12948 if (filename == NULL)
12949 return (NULL);
12950 if (ctxt == NULL)
12951 return (NULL);
12952
12953 xmlCtxtReset(ctxt);
12954
Daniel Veillard29614c72004-11-26 10:47:26 +000012955 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012956 if (stream == NULL) {
12957 return (NULL);
12958 }
12959 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000012960 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012961}
12962
12963/**
12964 * xmlCtxtReadMemory:
12965 * @ctxt: an XML parser context
12966 * @buffer: a pointer to a char array
12967 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000012968 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012969 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012970 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012971 *
12972 * parse an XML in-memory document and build a tree.
12973 * This reuses the existing @ctxt parser context
12974 *
12975 * Returns the resulting document tree
12976 */
12977xmlDocPtr
12978xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000012979 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000012980{
12981 xmlParserInputBufferPtr input;
12982 xmlParserInputPtr stream;
12983
12984 if (ctxt == NULL)
12985 return (NULL);
12986 if (buffer == NULL)
12987 return (NULL);
12988
12989 xmlCtxtReset(ctxt);
12990
12991 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
12992 if (input == NULL) {
12993 return(NULL);
12994 }
12995
12996 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12997 if (stream == NULL) {
12998 xmlFreeParserInputBuffer(input);
12999 return(NULL);
13000 }
13001
13002 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013003 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013004}
13005
13006/**
13007 * xmlCtxtReadFd:
13008 * @ctxt: an XML parser context
13009 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000013010 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013011 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013012 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013013 *
13014 * parse an XML from a file descriptor and build a tree.
13015 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013016 * NOTE that the file descriptor will not be closed when the
13017 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013018 *
13019 * Returns the resulting document tree
13020 */
13021xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000013022xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
13023 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013024{
13025 xmlParserInputBufferPtr input;
13026 xmlParserInputPtr stream;
13027
13028 if (fd < 0)
13029 return (NULL);
13030 if (ctxt == NULL)
13031 return (NULL);
13032
13033 xmlCtxtReset(ctxt);
13034
13035
13036 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
13037 if (input == NULL)
13038 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000013039 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013040 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13041 if (stream == NULL) {
13042 xmlFreeParserInputBuffer(input);
13043 return (NULL);
13044 }
13045 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013046 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013047}
13048
13049/**
13050 * xmlCtxtReadIO:
13051 * @ctxt: an XML parser context
13052 * @ioread: an I/O read function
13053 * @ioclose: an I/O close function
13054 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000013055 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013056 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013057 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013058 *
13059 * parse an XML document from I/O functions and source and build a tree.
13060 * This reuses the existing @ctxt parser context
13061 *
13062 * Returns the resulting document tree
13063 */
13064xmlDocPtr
13065xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
13066 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000013067 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013068 const char *encoding, int options)
13069{
13070 xmlParserInputBufferPtr input;
13071 xmlParserInputPtr stream;
13072
13073 if (ioread == NULL)
13074 return (NULL);
13075 if (ctxt == NULL)
13076 return (NULL);
13077
13078 xmlCtxtReset(ctxt);
13079
13080 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
13081 XML_CHAR_ENCODING_NONE);
13082 if (input == NULL)
13083 return (NULL);
13084 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
13085 if (stream == NULL) {
13086 xmlFreeParserInputBuffer(input);
13087 return (NULL);
13088 }
13089 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000013090 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013091}